Statistics
| Branch: | Revision:

ffmpeg / libavcodec / libpostproc / postprocess.c @ a7b2871c

History | View | Annotate | Download (32.1 KB)

1
/*
2
    Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3

4
    AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5

6
    This program is free software; you can redistribute it and/or modify
7
    it under the terms of the GNU General Public License as published by
8
    the Free Software Foundation; either version 2 of the License, or
9
    (at your option) any later version.
10

11
    This program is distributed in the hope that it will be useful,
12
    but WITHOUT ANY WARRANTY; without even the implied warranty of
13
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14
    GNU General Public License for more details.
15

16
    You should have received a copy of the GNU General Public License
17
    along with this program; if not, write to the Free Software
18
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
19
*/
20

    
21
/**
22
 * @file postprocess.c
23
 * postprocessing.
24
 */
25
 
26
/*
27
                        C        MMX        MMX2        3DNow        AltiVec
28
isVertDC                Ec        Ec                        Ec
29
isVertMinMaxOk                Ec        Ec                        Ec
30
doVertLowPass                E                e        e        Ec
31
doVertDefFilter                Ec        Ec        e        e        Ec
32
isHorizDC                Ec        Ec
33
isHorizMinMaxOk                a        E
34
doHorizLowPass                E                e        e
35
doHorizDefFilter        Ec        Ec        e        e
36
do_a_deblock                Ec        E        Ec        E
37
deRing                        E                e        e*        Ecp
38
Vertical RKAlgo1        E                a        a
39
Horizontal RKAlgo1                        a        a
40
Vertical X1#                a                E        E
41
Horizontal X1#                a                E        E
42
LinIpolDeinterlace        e                E        E*
43
CubicIpolDeinterlace        a                e        e*
44
LinBlendDeinterlace        e                E        E*
45
MedianDeinterlace#        E        Ec        Ec
46
TempDeNoiser#                E                e        e
47

48
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
49
# more or less selfinvented filters so the exactness isnt too meaningfull
50
E = Exact implementation
51
e = allmost exact implementation (slightly different rounding,...)
52
a = alternative / approximate impl
53
c = checked against the other implementations (-vo md5)
54
p = partially optimized, still some work to do
55
*/
56

    
57
/*
58
TODO:
59
reduce the time wasted on the mem transfer
60
unroll stuff if instructions depend too much on the prior one
61
move YScale thing to the end instead of fixing QP
62
write a faster and higher quality deblocking filter :)
63
make the mainloop more flexible (variable number of blocks at once
64
        (the if/else stuff per block is slowing things down)
65
compare the quality & speed of all filters
66
split this huge file
67
optimize c versions
68
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
69
...
70
*/
71

    
72
//Changelog: use the CVS log
73

    
74
#include "config.h"
75
#include <inttypes.h>
76
#include <stdio.h>
77
#include <stdlib.h>
78
#include <string.h>
79
#ifdef HAVE_MALLOC_H
80
#include <malloc.h>
81
#endif
82
//#undef HAVE_MMX2
83
//#define HAVE_3DNOW
84
//#undef HAVE_MMX
85
//#undef ARCH_X86
86
//#define DEBUG_BRIGHTNESS
87
#ifdef USE_FASTMEMCPY
88
#include "fastmemcpy.h"
89
#endif
90
#include "postprocess.h"
91
#include "postprocess_internal.h"
92

    
93
#include "mangle.h" //FIXME should be supressed
94

    
95
#ifdef HAVE_ALTIVEC_H
96
#include <altivec.h>
97
#endif
98

    
99
#ifndef HAVE_MEMALIGN
100
#define memalign(a,b) malloc(b)
101
#endif
102

    
103
#define MIN(a,b) ((a) > (b) ? (b) : (a))
104
#define MAX(a,b) ((a) < (b) ? (b) : (a))
105
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
106
#define SIGN(a) ((a) > 0 ? 1 : -1)
107

    
108
#define GET_MODE_BUFFER_SIZE 500
109
#define OPTIONS_ARRAY_SIZE 10
110
#define BLOCK_SIZE 8
111
#define TEMP_STRIDE 8
112
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
113

    
114
#if defined(__GNUC__) && (__GNUC__ > 3 || __GNUC__ == 3 && __GNUC_MINOR__ > 0)
115
#    define attribute_used __attribute__((used))
116
#    define always_inline __attribute__((always_inline)) inline
117
#else
118
#    define attribute_used
119
#    define always_inline inline
120
#endif
121

    
122
#ifdef ARCH_X86
123
static uint64_t __attribute__((aligned(8))) attribute_used w05=                0x0005000500050005LL;
124
static uint64_t __attribute__((aligned(8))) attribute_used w04=                0x0004000400040004LL;
125
static uint64_t __attribute__((aligned(8))) attribute_used w20=                0x0020002000200020LL;
126
static uint64_t __attribute__((aligned(8))) attribute_used b00=                 0x0000000000000000LL;
127
static uint64_t __attribute__((aligned(8))) attribute_used b01=                 0x0101010101010101LL;
128
static uint64_t __attribute__((aligned(8))) attribute_used b02=                 0x0202020202020202LL;
129
static uint64_t __attribute__((aligned(8))) attribute_used b08=                 0x0808080808080808LL;
130
static uint64_t __attribute__((aligned(8))) attribute_used b80=                 0x8080808080808080LL;
131
#endif
132

    
133
static uint8_t clip_table[3*256];
134
static uint8_t * const clip_tab= clip_table + 256;
135

    
136
static const int verbose= 0;
137

    
138
static const int attribute_used deringThreshold= 20;
139

    
140

    
141
static struct PPFilter filters[]=
142
{
143
        {"hb", "hdeblock",                 1, 1, 3, H_DEBLOCK},
144
        {"vb", "vdeblock",                 1, 2, 4, V_DEBLOCK},
145
/*        {"hr", "rkhdeblock",                 1, 1, 3, H_RK1_FILTER},
146
        {"vr", "rkvdeblock",                 1, 2, 4, V_RK1_FILTER},*/
147
        {"h1", "x1hdeblock",                 1, 1, 3, H_X1_FILTER},
148
        {"v1", "x1vdeblock",                 1, 2, 4, V_X1_FILTER},
149
        {"ha", "ahdeblock",                 1, 1, 3, H_A_DEBLOCK},
150
        {"va", "avdeblock",                 1, 2, 4, V_A_DEBLOCK},
151
        {"dr", "dering",                 1, 5, 6, DERING},
152
        {"al", "autolevels",                 0, 1, 2, LEVEL_FIX},
153
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
154
        {"li", "linipoldeint",                 1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
155
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
156
        {"md", "mediandeint",                 1, 1, 4, MEDIAN_DEINT_FILTER},
157
        {"fd", "ffmpegdeint",                 1, 1, 4, FFMPEG_DEINT_FILTER},
158
        {"l5", "lowpass5",                 1, 1, 4, LOWPASS5_DEINT_FILTER},
159
        {"tn", "tmpnoise",                 1, 7, 8, TEMP_NOISE_FILTER},
160
        {"fq", "forcequant",                 1, 0, 0, FORCE_QUANT},
161
        {NULL, NULL,0,0,0,0} //End Marker
162
};
163

    
164
static char *replaceTable[]=
165
{
166
        "default",         "hdeblock:a,vdeblock:a,dering:a",
167
        "de",                 "hdeblock:a,vdeblock:a,dering:a",
168
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
169
        "fa",                 "x1hdeblock:a,x1vdeblock:a,dering:a",
170
        "ac",                 "ha:a:128:7,va:a,dering:a",
171
        NULL //End Marker
172
};
173

    
174

    
175
#ifdef ARCH_X86
176
static inline void prefetchnta(void *p)
177
{
178
        asm volatile(        "prefetchnta (%0)\n\t"
179
                : : "r" (p)
180
        );
181
}
182

    
183
static inline void prefetcht0(void *p)
184
{
185
        asm volatile(        "prefetcht0 (%0)\n\t"
186
                : : "r" (p)
187
        );
188
}
189

    
190
static inline void prefetcht1(void *p)
191
{
192
        asm volatile(        "prefetcht1 (%0)\n\t"
193
                : : "r" (p)
194
        );
195
}
196

    
197
static inline void prefetcht2(void *p)
198
{
199
        asm volatile(        "prefetcht2 (%0)\n\t"
200
                : : "r" (p)
201
        );
202
}
203
#endif
204

    
205
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
206

    
207
/**
208
 * Check if the given 8x8 Block is mostly "flat"
209
 */
210
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
211
{
212
        int numEq= 0;
213
        int y;
214
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
215
        const int dcThreshold= dcOffset*2 + 1;
216

    
217
        for(y=0; y<BLOCK_SIZE; y++)
218
        {
219
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
220
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
221
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
222
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
223
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
224
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
225
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
226
                src+= stride;
227
        }
228
        return numEq > c->ppMode.flatnessThreshold;
229
}
230

    
231
/**
232
 * Check if the middle 8x8 Block in the given 8x16 block is flat
233
 */
234
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
235
        int numEq= 0;
236
        int y;
237
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
238
        const int dcThreshold= dcOffset*2 + 1;
239

    
240
        src+= stride*4; // src points to begin of the 8x8 Block
241
        for(y=0; y<BLOCK_SIZE-1; y++)
242
        {
243
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
244
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
245
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
246
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
247
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
248
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
249
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
250
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
251
                src+= stride;
252
        }
253
        return numEq > c->ppMode.flatnessThreshold;
254
}
255

    
256
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
257
{
258
        int i;
259
#if 1
260
        for(i=0; i<2; i++){
261
                if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
262
                src += stride;
263
                if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
264
                src += stride;
265
                if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
266
                src += stride;
267
                if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
268
                src += stride;
269
        }
270
#else        
271
        for(i=0; i<8; i++){
272
                if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
273
                src += stride;
274
        }
275
#endif
276
        return 1;
277
}
278

    
279
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
280
{
281
#if 1
282
#if 1
283
        int x;
284
        src+= stride*4;
285
        for(x=0; x<BLOCK_SIZE; x+=4)
286
        {
287
                if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
288
                if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
289
                if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
290
                if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
291
        }
292
#else
293
        int x;
294
        src+= stride*3;
295
        for(x=0; x<BLOCK_SIZE; x++)
296
        {
297
                if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
298
        }
299
#endif
300
        return 1;
301
#else
302
        int x;
303
        src+= stride*4;
304
        for(x=0; x<BLOCK_SIZE; x++)
305
        {
306
                int min=255;
307
                int max=0;
308
                int y;
309
                for(y=0; y<8; y++){
310
                        int v= src[x + y*stride];
311
                        if(v>max) max=v;
312
                        if(v<min) min=v;
313
                }
314
                if(max-min > 2*QP) return 0;
315
        }
316
        return 1;
317
#endif
318
}
319

    
320
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
321
        if( isHorizDC_C(src, stride, c) ){
322
                if( isHorizMinMaxOk_C(src, stride, c->QP) )
323
                        return 1;
324
                else
325
                        return 0;
326
        }else{
327
                return 2;
328
        }
329
}
330

    
331
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
332
        if( isVertDC_C(src, stride, c) ){
333
                if( isVertMinMaxOk_C(src, stride, c->QP) )
334
                        return 1;
335
                else
336
                        return 0;
337
        }else{
338
                return 2;
339
        }
340
}
341

    
342
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
343
{
344
        int y;
345
        for(y=0; y<BLOCK_SIZE; y++)
346
        {
347
                const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
348

    
349
                if(ABS(middleEnergy) < 8*c->QP)
350
                {
351
                        const int q=(dst[3] - dst[4])/2;
352
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
353
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
354

    
355
                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
356
                        d= MAX(d, 0);
357

    
358
                        d= (5*d + 32) >> 6;
359
                        d*= SIGN(-middleEnergy);
360

    
361
                        if(q>0)
362
                        {
363
                                d= d<0 ? 0 : d;
364
                                d= d>q ? q : d;
365
                        }
366
                        else
367
                        {
368
                                d= d>0 ? 0 : d;
369
                                d= d<q ? q : d;
370
                        }
371

    
372
                        dst[3]-= d;
373
                        dst[4]+= d;
374
                }
375
                dst+= stride;
376
        }
377
}
378

    
379
/**
380
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
381
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
382
 */
383
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
384
{
385
        int y;
386
        for(y=0; y<BLOCK_SIZE; y++)
387
        {
388
                const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
389
                const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
390

    
391
                int sums[10];
392
                sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
393
                sums[1] = sums[0] - first  + dst[3];
394
                sums[2] = sums[1] - first  + dst[4];
395
                sums[3] = sums[2] - first  + dst[5];
396
                sums[4] = sums[3] - first  + dst[6];
397
                sums[5] = sums[4] - dst[0] + dst[7];
398
                sums[6] = sums[5] - dst[1] + last;
399
                sums[7] = sums[6] - dst[2] + last;
400
                sums[8] = sums[7] - dst[3] + last;
401
                sums[9] = sums[8] - dst[4] + last;
402

    
403
                dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
404
                dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
405
                dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
406
                dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
407
                dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
408
                dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
409
                dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
410
                dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
411

    
412
                dst+= stride;
413
        }
414
}
415

    
416
/**
417
 * Experimental Filter 1 (Horizontal)
418
 * will not damage linear gradients
419
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
420
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
421
 * MMX2 version does correct clipping C version doesnt
422
 * not identical with the vertical one
423
 */
424
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
425
{
426
        int y;
427
        static uint64_t *lut= NULL;
428
        if(lut==NULL)
429
        {
430
                int i;
431
                lut= (uint64_t*)memalign(8, 256*8);
432
                for(i=0; i<256; i++)
433
                {
434
                        int v= i < 128 ? 2*i : 2*(i-256);
435
/*
436
//Simulate 112242211 9-Tap filter
437
                        uint64_t a= (v/16) & 0xFF;
438
                        uint64_t b= (v/8) & 0xFF;
439
                        uint64_t c= (v/4) & 0xFF;
440
                        uint64_t d= (3*v/8) & 0xFF;
441
*/
442
//Simulate piecewise linear interpolation
443
                        uint64_t a= (v/16) & 0xFF;
444
                        uint64_t b= (v*3/16) & 0xFF;
445
                        uint64_t c= (v*5/16) & 0xFF;
446
                        uint64_t d= (7*v/16) & 0xFF;
447
                        uint64_t A= (0x100 - a)&0xFF;
448
                        uint64_t B= (0x100 - b)&0xFF;
449
                        uint64_t C= (0x100 - c)&0xFF;
450
                        uint64_t D= (0x100 - c)&0xFF;
451

    
452
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
453
                                (D<<24) | (C<<16) | (B<<8) | (A);
454
                        //lut[i] = (v<<32) | (v<<24);
455
                }
456
        }
457

    
458
        for(y=0; y<BLOCK_SIZE; y++)
459
        {
460
                int a= src[1] - src[2];
461
                int b= src[3] - src[4];
462
                int c= src[5] - src[6];
463

    
464
                int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
465

    
466
                if(d < QP)
467
                {
468
                        int v = d * SIGN(-b);
469

    
470
                        src[1] +=v/8;
471
                        src[2] +=v/4;
472
                        src[3] +=3*v/8;
473
                        src[4] -=3*v/8;
474
                        src[5] -=v/4;
475
                        src[6] -=v/8;
476

    
477
                }
478
                src+=stride;
479
        }
480
}
481

    
482
/**
483
 * accurate deblock filter
484
 */
485
static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
486
        int y;
487
        const int QP= c->QP;
488
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
489
        const int dcThreshold= dcOffset*2 + 1;
490
//START_TIMER
491
        src+= step*4; // src points to begin of the 8x8 Block
492
        for(y=0; y<8; y++){
493
                int numEq= 0;
494

    
495
                if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
496
                if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
497
                if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
498
                if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
499
                if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
500
                if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
501
                if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
502
                if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
503
                if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
504
                if(numEq > c->ppMode.flatnessThreshold){
505
                        int min, max, x;
506
                        
507
                        if(src[0] > src[step]){
508
                            max= src[0];
509
                            min= src[step];
510
                        }else{
511
                            max= src[step];
512
                            min= src[0];
513
                        }
514
                        for(x=2; x<8; x+=2){
515
                                if(src[x*step] > src[(x+1)*step]){
516
                                        if(src[x    *step] > max) max= src[ x   *step];
517
                                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
518
                                }else{
519
                                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
520
                                        if(src[ x   *step] < min) min= src[ x   *step];
521
                                }
522
                        }
523
                        if(max-min < 2*QP){
524
                                const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
525
                                const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
526
                                
527
                                int sums[10];
528
                                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
529
                                sums[1] = sums[0] - first       + src[3*step];
530
                                sums[2] = sums[1] - first       + src[4*step];
531
                                sums[3] = sums[2] - first       + src[5*step];
532
                                sums[4] = sums[3] - first       + src[6*step];
533
                                sums[5] = sums[4] - src[0*step] + src[7*step];
534
                                sums[6] = sums[5] - src[1*step] + last;
535
                                sums[7] = sums[6] - src[2*step] + last;
536
                                sums[8] = sums[7] - src[3*step] + last;
537
                                sums[9] = sums[8] - src[4*step] + last;
538

    
539
                                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
540
                                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
541
                                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
542
                                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
543
                                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
544
                                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
545
                                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
546
                                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
547
                        }
548
                }else{
549
                        const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
550

    
551
                        if(ABS(middleEnergy) < 8*QP)
552
                        {
553
                                const int q=(src[3*step] - src[4*step])/2;
554
                                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
555
                                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
556

    
557
                                int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
558
                                d= MAX(d, 0);
559
        
560
                                d= (5*d + 32) >> 6;
561
                                d*= SIGN(-middleEnergy);
562
        
563
                                if(q>0)
564
                                {
565
                                        d= d<0 ? 0 : d;
566
                                        d= d>q ? q : d;
567
                                }
568
                                else
569
                                {
570
                                        d= d>0 ? 0 : d;
571
                                        d= d<q ? q : d;
572
                                }
573
        
574
                                src[3*step]-= d;
575
                                src[4*step]+= d;
576
                        }
577
                }
578

    
579
                src += stride;
580
        }
581
/*if(step==16){
582
    STOP_TIMER("step16")
583
}else{
584
    STOP_TIMER("stepX")
585
}*/
586
}
587

    
588
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
589
//Plain C versions
590
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
591
#define COMPILE_C
592
#endif
593

    
594
#ifdef ARCH_POWERPC
595
#ifdef HAVE_ALTIVEC
596
#define COMPILE_ALTIVEC
597
#ifndef CONFIG_DARWIN
598
#warning "################################################################################"
599
#warning  "WARNING: No gcc available as of today (2004-05-25) seems to be able to compile properly some of the code under non-Darwin PPC OSes. Some functions result in wrong results, while others simply won't compile (gcc explodes after allocating 1GiB+)."
600
#warning "################################################################################"
601
#endif //CONFIG_DARWIN
602
#endif //HAVE_ALTIVEC
603
#endif //ARCH_POWERPC
604

    
605
#ifdef ARCH_X86
606

    
607
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
608
#define COMPILE_MMX
609
#endif
610

    
611
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
612
#define COMPILE_MMX2
613
#endif
614

    
615
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
616
#define COMPILE_3DNOW
617
#endif
618
#endif //ARCH_X86
619

    
620
#undef HAVE_MMX
621
#undef HAVE_MMX2
622
#undef HAVE_3DNOW
623
#undef HAVE_ALTIVEC
624
#undef ARCH_X86
625

    
626
#ifdef COMPILE_C
627
#undef HAVE_MMX
628
#undef HAVE_MMX2
629
#undef HAVE_3DNOW
630
#undef ARCH_X86
631
#define RENAME(a) a ## _C
632
#include "postprocess_template.c"
633
#endif
634

    
635
#ifdef ARCH_POWERPC
636
#ifdef COMPILE_ALTIVEC
637
#undef RENAME
638
#define HAVE_ALTIVEC
639
#define RENAME(a) a ## _altivec
640
#include "postprocess_altivec_template.c"
641
#include "postprocess_template.c"
642
#endif
643
#endif //ARCH_POWERPC
644

    
645
//MMX versions
646
#ifdef COMPILE_MMX
647
#undef RENAME
648
#define HAVE_MMX
649
#undef HAVE_MMX2
650
#undef HAVE_3DNOW
651
#define ARCH_X86
652
#define RENAME(a) a ## _MMX
653
#include "postprocess_template.c"
654
#endif
655

    
656
//MMX2 versions
657
#ifdef COMPILE_MMX2
658
#undef RENAME
659
#define HAVE_MMX
660
#define HAVE_MMX2
661
#undef HAVE_3DNOW
662
#define ARCH_X86
663
#define RENAME(a) a ## _MMX2
664
#include "postprocess_template.c"
665
#endif
666

    
667
//3DNOW versions
668
#ifdef COMPILE_3DNOW
669
#undef RENAME
670
#define HAVE_MMX
671
#undef HAVE_MMX2
672
#define HAVE_3DNOW
673
#define ARCH_X86
674
#define RENAME(a) a ## _3DNow
675
#include "postprocess_template.c"
676
#endif
677

    
678
// minor note: the HAVE_xyz is messed up after that line so dont use it
679

    
680
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
681
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
682
{
683
        PPContext *c= (PPContext *)vc;
684
        PPMode *ppMode= (PPMode *)vm;
685
        c->ppMode= *ppMode; //FIXME
686

    
687
        // useing ifs here as they are faster than function pointers allthough the
688
        // difference wouldnt be messureable here but its much better because
689
        // someone might exchange the cpu whithout restarting mplayer ;)
690
#ifdef RUNTIME_CPUDETECT
691
#ifdef ARCH_X86
692
        // ordered per speed fasterst first
693
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
694
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
695
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
696
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
697
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
698
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
699
        else
700
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
701
#else
702
#ifdef ARCH_POWERPC
703
#ifdef HAVE_ALTIVEC
704
        else if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
705
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
706
        else
707
#endif
708
#endif
709
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
710
#endif
711
#else //RUNTIME_CPUDETECT
712
#ifdef HAVE_MMX2
713
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
714
#elif defined (HAVE_3DNOW)
715
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
716
#elif defined (HAVE_MMX)
717
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
718
#elif defined (HAVE_ALTIVEC)
719
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
720
#else
721
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
722
#endif
723
#endif //!RUNTIME_CPUDETECT
724
}
725

    
726
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
727
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
728

    
729
/* -pp Command line Help
730
*/
731
char *pp_help=
732
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
733
"long form example:\n"
734
"vdeblock:autoq/hdeblock:autoq/linblenddeint        default,-vdeblock\n"
735
"short form example:\n"
736
"vb:a/hb:a/lb                                        de,-vb\n"
737
"more examples:\n"
738
"tn:64:128:256\n"
739
"Filters                        Options\n"
740
"short        long name        short        long option        Description\n"
741
"*        *                a        autoq                CPU power dependent enabler\n"
742
"                        c        chrom                chrominance filtering enabled\n"
743
"                        y        nochrom                chrominance filtering disabled\n"
744
"hb        hdeblock        (2 threshold)                horizontal deblocking filter\n"
745
"        1. difference factor: default=32, higher -> more deblocking\n"
746
"        2. flatness threshold: default=39, lower -> more deblocking\n"
747
"                        the h & v deblocking filters share these\n"
748
"                        so you can't set different thresholds for h / v\n"
749
"vb        vdeblock        (2 threshold)                vertical deblocking filter\n"
750
"ha        hadeblock        (2 threshold)                horizontal deblocking filter\n"
751
"va        vadeblock        (2 threshold)                vertical deblocking filter\n"
752
"h1        x1hdeblock                                experimental h deblock filter 1\n"
753
"v1        x1vdeblock                                experimental v deblock filter 1\n"
754
"dr        dering                                        deringing filter\n"
755
"al        autolevels                                automatic brightness / contrast\n"
756
"                        f        fullyrange        stretch luminance to (0..255)\n"
757
"lb        linblenddeint                                linear blend deinterlacer\n"
758
"li        linipoldeint                                linear interpolating deinterlace\n"
759
"ci        cubicipoldeint                                cubic interpolating deinterlacer\n"
760
"md        mediandeint                                median deinterlacer\n"
761
"fd        ffmpegdeint                                ffmpeg deinterlacer\n"
762
"de        default                                        hb:a,vb:a,dr:a\n"
763
"fa        fast                                        h1:a,v1:a,dr:a\n"
764
"tn        tmpnoise        (3 threshold)                temporal noise reducer\n"
765
"                        1. <= 2. <= 3.                larger -> stronger filtering\n"
766
"fq        forceQuant        <quantizer>                force quantizer\n"
767
;
768

    
769
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
770
{
771
        char temp[GET_MODE_BUFFER_SIZE];
772
        char *p= temp;
773
        char *filterDelimiters= ",/";
774
        char *optionDelimiters= ":";
775
        struct PPMode *ppMode;
776
        char *filterToken;
777

    
778
        ppMode= memalign(8, sizeof(PPMode));
779
        
780
        ppMode->lumMode= 0;
781
        ppMode->chromMode= 0;
782
        ppMode->maxTmpNoise[0]= 700;
783
        ppMode->maxTmpNoise[1]= 1500;
784
        ppMode->maxTmpNoise[2]= 3000;
785
        ppMode->maxAllowedY= 234;
786
        ppMode->minAllowedY= 16;
787
        ppMode->baseDcDiff= 256/8;
788
        ppMode->flatnessThreshold= 56-16-1;
789
        ppMode->maxClippedThreshold= 0.01;
790
        ppMode->error=0;
791

    
792
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
793

    
794
        if(verbose>1) printf("pp: %s\n", name);
795

    
796
        for(;;){
797
                char *filterName;
798
                int q= 1000000; //PP_QUALITY_MAX;
799
                int chrom=-1;
800
                char *option;
801
                char *options[OPTIONS_ARRAY_SIZE];
802
                int i;
803
                int filterNameOk=0;
804
                int numOfUnknownOptions=0;
805
                int enable=1; //does the user want us to enabled or disabled the filter
806

    
807
                filterToken= strtok(p, filterDelimiters);
808
                if(filterToken == NULL) break;
809
                p+= strlen(filterToken) + 1; // p points to next filterToken
810
                filterName= strtok(filterToken, optionDelimiters);
811
                if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
812

    
813
                if(*filterName == '-')
814
                {
815
                        enable=0;
816
                        filterName++;
817
                }
818

    
819
                for(;;){ //for all options
820
                        option= strtok(NULL, optionDelimiters);
821
                        if(option == NULL) break;
822

    
823
                        if(verbose>1) printf("pp: option: %s\n", option);
824
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
825
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
826
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
827
                        else
828
                        {
829
                                options[numOfUnknownOptions] = option;
830
                                numOfUnknownOptions++;
831
                        }
832
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
833
                }
834
                options[numOfUnknownOptions] = NULL;
835

    
836
                /* replace stuff from the replace Table */
837
                for(i=0; replaceTable[2*i]!=NULL; i++)
838
                {
839
                        if(!strcmp(replaceTable[2*i], filterName))
840
                        {
841
                                int newlen= strlen(replaceTable[2*i + 1]);
842
                                int plen;
843
                                int spaceLeft;
844

    
845
                                if(p==NULL) p= temp, *p=0;         //last filter
846
                                else p--, *p=',';                //not last filter
847

    
848
                                plen= strlen(p);
849
                                spaceLeft= p - temp + plen;
850
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
851
                                {
852
                                        ppMode->error++;
853
                                        break;
854
                                }
855
                                memmove(p + newlen, p, plen+1);
856
                                memcpy(p, replaceTable[2*i + 1], newlen);
857
                                filterNameOk=1;
858
                        }
859
                }
860

    
861
                for(i=0; filters[i].shortName!=NULL; i++)
862
                {
863
//                        printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
864
                        if(   !strcmp(filters[i].longName, filterName)
865
                           || !strcmp(filters[i].shortName, filterName))
866
                        {
867
                                ppMode->lumMode &= ~filters[i].mask;
868
                                ppMode->chromMode &= ~filters[i].mask;
869

    
870
                                filterNameOk=1;
871
                                if(!enable) break; // user wants to disable it
872

    
873
                                if(q >= filters[i].minLumQuality)
874
                                        ppMode->lumMode|= filters[i].mask;
875
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
876
                                        if(q >= filters[i].minChromQuality)
877
                                                ppMode->chromMode|= filters[i].mask;
878

    
879
                                if(filters[i].mask == LEVEL_FIX)
880
                                {
881
                                        int o;
882
                                        ppMode->minAllowedY= 16;
883
                                        ppMode->maxAllowedY= 234;
884
                                        for(o=0; options[o]!=NULL; o++)
885
                                        {
886
                                                if(  !strcmp(options[o],"fullyrange")
887
                                                   ||!strcmp(options[o],"f"))
888
                                                {
889
                                                        ppMode->minAllowedY= 0;
890
                                                        ppMode->maxAllowedY= 255;
891
                                                        numOfUnknownOptions--;
892
                                                }
893
                                        }
894
                                }
895
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
896
                                {
897
                                        int o;
898
                                        int numOfNoises=0;
899

    
900
                                        for(o=0; options[o]!=NULL; o++)
901
                                        {
902
                                                char *tail;
903
                                                ppMode->maxTmpNoise[numOfNoises]=
904
                                                        strtol(options[o], &tail, 0);
905
                                                if(tail!=options[o])
906
                                                {
907
                                                        numOfNoises++;
908
                                                        numOfUnknownOptions--;
909
                                                        if(numOfNoises >= 3) break;
910
                                                }
911
                                        }
912
                                }
913
                                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK 
914
                                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
915
                                {
916
                                        int o;
917

    
918
                                        for(o=0; options[o]!=NULL && o<2; o++)
919
                                        {
920
                                                char *tail;
921
                                                int val= strtol(options[o], &tail, 0);
922
                                                if(tail==options[o]) break;
923

    
924
                                                numOfUnknownOptions--;
925
                                                if(o==0) ppMode->baseDcDiff= val;
926
                                                else ppMode->flatnessThreshold= val;
927
                                        }
928
                                }
929
                                else if(filters[i].mask == FORCE_QUANT)
930
                                {
931
                                        int o;
932
                                        ppMode->forcedQuant= 15;
933

    
934
                                        for(o=0; options[o]!=NULL && o<1; o++)
935
                                        {
936
                                                char *tail;
937
                                                int val= strtol(options[o], &tail, 0);
938
                                                if(tail==options[o]) break;
939

    
940
                                                numOfUnknownOptions--;
941
                                                ppMode->forcedQuant= val;
942
                                        }
943
                                }
944
                        }
945
                }
946
                if(!filterNameOk) ppMode->error++;
947
                ppMode->error += numOfUnknownOptions;
948
        }
949

    
950
        if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
951
        if(ppMode->error)
952
        {
953
                fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
954
                free(ppMode);
955
                return NULL;
956
        }
957
        return ppMode;
958
}
959

    
960
void pp_free_mode(pp_mode_t *mode){
961
    if(mode) free(mode);
962
}
963

    
964
static void reallocAlign(void **p, int alignment, int size){
965
        if(*p) free(*p);
966
        *p= memalign(alignment, size);
967
        memset(*p, 0, size);
968
}
969

    
970
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
971
        int mbWidth = (width+15)>>4;
972
        int mbHeight= (height+15)>>4;
973
        int i;
974

    
975
        c->stride= stride;
976
        c->qpStride= qpStride;
977

    
978
        reallocAlign((void **)&c->tempDst, 8, stride*24);
979
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
980
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
981
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
982
        for(i=0; i<256; i++)
983
                c->yHistogram[i]= width*height/64*15/256;
984

    
985
        for(i=0; i<3; i++)
986
        {
987
                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
988
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
989
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
990
        }
991

    
992
        reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
993
        reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
994
        reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
995
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
996
}
997

    
998
static void global_init(void){
999
        int i;
1000
        memset(clip_table, 0, 256);
1001
        for(i=256; i<512; i++)
1002
                clip_table[i]= i;
1003
        memset(clip_table+512, 0, 256);
1004
}
1005

    
1006
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
1007
        PPContext *c= memalign(32, sizeof(PPContext));
1008
        int stride= (width+15)&(~15); //assumed / will realloc if needed
1009
        int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
1010
        
1011
        global_init();
1012

    
1013
        memset(c, 0, sizeof(PPContext));
1014
        c->cpuCaps= cpuCaps;
1015
        if(cpuCaps&PP_FORMAT){
1016
                c->hChromaSubSample= cpuCaps&0x3;
1017
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
1018
        }else{
1019
                c->hChromaSubSample= 1;
1020
                c->vChromaSubSample= 1;
1021
        }
1022

    
1023
        reallocBuffers(c, width, height, stride, qpStride);
1024
        
1025
        c->frameNum=-1;
1026

    
1027
        return c;
1028
}
1029

    
1030
void pp_free_context(void *vc){
1031
        PPContext *c = (PPContext*)vc;
1032
        int i;
1033
        
1034
        for(i=0; i<3; i++) free(c->tempBlured[i]);
1035
        for(i=0; i<3; i++) free(c->tempBluredPast[i]);
1036
        
1037
        free(c->tempBlocks);
1038
        free(c->yHistogram);
1039
        free(c->tempDst);
1040
        free(c->tempSrc);
1041
        free(c->deintTemp);
1042
        free(c->stdQPTable);
1043
        free(c->nonBQPTable);
1044
        free(c->forcedQPTable);
1045
        
1046
        memset(c, 0, sizeof(PPContext));
1047

    
1048
        free(c);
1049
}
1050

    
1051
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
1052
                 uint8_t * dst[3], int dstStride[3],
1053
                 int width, int height,
1054
                 QP_STORE_T *QP_store,  int QPStride,
1055
                 pp_mode_t *vm,  void *vc, int pict_type)
1056
{
1057
        int mbWidth = (width+15)>>4;
1058
        int mbHeight= (height+15)>>4;
1059
        PPMode *mode = (PPMode*)vm;
1060
        PPContext *c = (PPContext*)vc;
1061
        int minStride= MAX(srcStride[0], dstStride[0]);
1062

    
1063
        if(c->stride < minStride || c->qpStride < QPStride)
1064
                reallocBuffers(c, width, height, 
1065
                                MAX(minStride, c->stride), 
1066
                                MAX(c->qpStride, QPStride));
1067

    
1068
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 
1069
        {
1070
                int i;
1071
                QP_store= c->forcedQPTable;
1072
                QPStride= 0;
1073
                if(mode->lumMode & FORCE_QUANT)
1074
                        for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1075
                else
1076
                        for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1077
        }
1078
//printf("pict_type:%d\n", pict_type);
1079

    
1080
        if(pict_type & PP_PICT_TYPE_QP2){
1081
                int i;
1082
                const int count= mbHeight * QPStride;
1083
                for(i=0; i<(count>>2); i++){
1084
                        ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1085
                }
1086
                for(i<<=2; i<count; i++){
1087
                        c->stdQPTable[i] = QP_store[i]>>1;
1088
                }
1089
                QP_store= c->stdQPTable;
1090
        }
1091

    
1092
if(0){
1093
int x,y;
1094
for(y=0; y<mbHeight; y++){
1095
        for(x=0; x<mbWidth; x++){
1096
                printf("%2d ", QP_store[x + y*QPStride]);
1097
        }
1098
        printf("\n");
1099
}
1100
        printf("\n");
1101
}
1102

    
1103
        if((pict_type&7)!=3)
1104
        {
1105
                int i;
1106
                const int count= mbHeight * QPStride;
1107
                for(i=0; i<(count>>2); i++){
1108
                        ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1109
                }
1110
                for(i<<=2; i<count; i++){
1111
                        c->nonBQPTable[i] = QP_store[i] & 0x3F;
1112
                }
1113
        }
1114

    
1115
        if(verbose>2)
1116
        {
1117
                printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
1118
        }
1119

    
1120
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1121
                width, height, QP_store, QPStride, 0, mode, c);
1122

    
1123
        width  = (width )>>c->hChromaSubSample;
1124
        height = (height)>>c->vChromaSubSample;
1125

    
1126
        if(mode->chromMode)
1127
        {
1128
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1129
                        width, height, QP_store, QPStride, 1, mode, c);
1130
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1131
                        width, height, QP_store, QPStride, 2, mode, c);
1132
        }
1133
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1134
        {
1135
                memcpy(dst[1], src[1], srcStride[1]*height);
1136
                memcpy(dst[2], src[2], srcStride[2]*height);
1137
        }
1138
        else
1139
        {
1140
                int y;
1141
                for(y=0; y<height; y++)
1142
                {
1143
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1144
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1145
                }
1146
        }
1147
}
1148