Statistics
| Branch: | Revision:

ffmpeg / libpostproc / postprocess.c @ c6a9271c

History | View | Annotate | Download (44.2 KB)

1
/*
2
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23
/**
24
 * @file postprocess.c
25
 * postprocessing.
26
 */
27

    
28
/*
29
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49

50
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
51
# more or less selfinvented filters so the exactness isnt too meaningfull
52
E = Exact implementation
53
e = allmost exact implementation (slightly different rounding,...)
54
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57
*/
58

    
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
        (the if/else stuff per block is slowing things down)
67
compare the quality & speed of all filters
68
split this huge file
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72
*/
73

    
74
//Changelog: use the Subversion log
75

    
76
#include "config.h"
77
#include "avutil.h"
78
#include <inttypes.h>
79
#include <stdio.h>
80
#include <stdlib.h>
81
#include <string.h>
82
#ifdef HAVE_MALLOC_H
83
#include <malloc.h>
84
#endif
85
//#undef HAVE_MMX2
86
//#define HAVE_3DNOW
87
//#undef HAVE_MMX
88
//#undef ARCH_X86
89
//#define DEBUG_BRIGHTNESS
90
#ifdef USE_FASTMEMCPY
91
#include "libvo/fastmemcpy.h"
92
#endif
93
#include "postprocess.h"
94
#include "postprocess_internal.h"
95

    
96
#include "mangle.h" //FIXME should be supressed
97

    
98
#ifdef HAVE_ALTIVEC_H
99
#include <altivec.h>
100
#endif
101

    
102
#define MIN(a,b) ((a) > (b) ? (b) : (a))
103
#define MAX(a,b) ((a) < (b) ? (b) : (a))
104
#define ABS(a) ((a) > 0 ? (a) : (-(a)))
105
#define SIGN(a) ((a) > 0 ? 1 : -1)
106

    
107
#define GET_MODE_BUFFER_SIZE 500
108
#define OPTIONS_ARRAY_SIZE 10
109
#define BLOCK_SIZE 8
110
#define TEMP_STRIDE 8
111
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
112

    
113
#if defined(ARCH_X86) || defined(ARCH_X86_64)
114
static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
115
static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
116
static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
117
static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
118
static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
119
static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
120
static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
121
static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
122
#endif
123

    
124
static uint8_t clip_table[3*256];
125
static uint8_t * const clip_tab= clip_table + 256;
126

    
127
static const int verbose= 0;
128

    
129
static const int attribute_used deringThreshold= 20;
130

    
131

    
132
static struct PPFilter filters[]=
133
{
134
        {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
135
        {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
136
/*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
137
        {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
138
        {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
139
        {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
140
        {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
141
        {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
142
        {"dr", "dering",                1, 5, 6, DERING},
143
        {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
144
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
145
        {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
146
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
147
        {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
148
        {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
149
        {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
150
        {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
151
        {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
152
        {NULL, NULL,0,0,0,0} //End Marker
153
};
154

    
155
static const char *replaceTable[]=
156
{
157
        "default",      "hdeblock:a,vdeblock:a,dering:a",
158
        "de",           "hdeblock:a,vdeblock:a,dering:a",
159
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
160
        "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a",
161
        "ac",           "ha:a:128:7,va:a,dering:a",
162
        NULL //End Marker
163
};
164

    
165

    
166
#if defined(ARCH_X86) || defined(ARCH_X86_64)
167
static inline void prefetchnta(void *p)
168
{
169
        asm volatile(   "prefetchnta (%0)\n\t"
170
                : : "r" (p)
171
        );
172
}
173

    
174
static inline void prefetcht0(void *p)
175
{
176
        asm volatile(   "prefetcht0 (%0)\n\t"
177
                : : "r" (p)
178
        );
179
}
180

    
181
static inline void prefetcht1(void *p)
182
{
183
        asm volatile(   "prefetcht1 (%0)\n\t"
184
                : : "r" (p)
185
        );
186
}
187

    
188
static inline void prefetcht2(void *p)
189
{
190
        asm volatile(   "prefetcht2 (%0)\n\t"
191
                : : "r" (p)
192
        );
193
}
194
#endif
195

    
196
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
197

    
198
/**
199
 * Check if the given 8x8 Block is mostly "flat"
200
 */
201
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
202
{
203
        int numEq= 0;
204
        int y;
205
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
206
        const int dcThreshold= dcOffset*2 + 1;
207

    
208
        for(y=0; y<BLOCK_SIZE; y++)
209
        {
210
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
211
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
212
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
213
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
214
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
215
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
216
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
217
                src+= stride;
218
        }
219
        return numEq > c->ppMode.flatnessThreshold;
220
}
221

    
222
/**
223
 * Check if the middle 8x8 Block in the given 8x16 block is flat
224
 */
225
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
226
        int numEq= 0;
227
        int y;
228
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
229
        const int dcThreshold= dcOffset*2 + 1;
230

    
231
        src+= stride*4; // src points to begin of the 8x8 Block
232
        for(y=0; y<BLOCK_SIZE-1; y++)
233
        {
234
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
235
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
236
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
237
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
238
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
239
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
240
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
241
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
242
                src+= stride;
243
        }
244
        return numEq > c->ppMode.flatnessThreshold;
245
}
246

    
247
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
248
{
249
        int i;
250
#if 1
251
        for(i=0; i<2; i++){
252
                if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
253
                src += stride;
254
                if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
255
                src += stride;
256
                if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
257
                src += stride;
258
                if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
259
                src += stride;
260
        }
261
#else
262
        for(i=0; i<8; i++){
263
                if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
264
                src += stride;
265
        }
266
#endif
267
        return 1;
268
}
269

    
270
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
271
{
272
#if 1
273
#if 1
274
        int x;
275
        src+= stride*4;
276
        for(x=0; x<BLOCK_SIZE; x+=4)
277
        {
278
                if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
279
                if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
280
                if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
281
                if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
282
        }
283
#else
284
        int x;
285
        src+= stride*3;
286
        for(x=0; x<BLOCK_SIZE; x++)
287
        {
288
                if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
289
        }
290
#endif
291
        return 1;
292
#else
293
        int x;
294
        src+= stride*4;
295
        for(x=0; x<BLOCK_SIZE; x++)
296
        {
297
                int min=255;
298
                int max=0;
299
                int y;
300
                for(y=0; y<8; y++){
301
                        int v= src[x + y*stride];
302
                        if(v>max) max=v;
303
                        if(v<min) min=v;
304
                }
305
                if(max-min > 2*QP) return 0;
306
        }
307
        return 1;
308
#endif
309
}
310

    
311
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
312
        if( isHorizDC_C(src, stride, c) ){
313
                if( isHorizMinMaxOk_C(src, stride, c->QP) )
314
                        return 1;
315
                else
316
                        return 0;
317
        }else{
318
                return 2;
319
        }
320
}
321

    
322
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
323
        if( isVertDC_C(src, stride, c) ){
324
                if( isVertMinMaxOk_C(src, stride, c->QP) )
325
                        return 1;
326
                else
327
                        return 0;
328
        }else{
329
                return 2;
330
        }
331
}
332

    
333
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
334
{
335
        int y;
336
        for(y=0; y<BLOCK_SIZE; y++)
337
        {
338
                const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
339

    
340
                if(ABS(middleEnergy) < 8*c->QP)
341
                {
342
                        const int q=(dst[3] - dst[4])/2;
343
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
344
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
345

    
346
                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
347
                        d= MAX(d, 0);
348

    
349
                        d= (5*d + 32) >> 6;
350
                        d*= SIGN(-middleEnergy);
351

    
352
                        if(q>0)
353
                        {
354
                                d= d<0 ? 0 : d;
355
                                d= d>q ? q : d;
356
                        }
357
                        else
358
                        {
359
                                d= d>0 ? 0 : d;
360
                                d= d<q ? q : d;
361
                        }
362

    
363
                        dst[3]-= d;
364
                        dst[4]+= d;
365
                }
366
                dst+= stride;
367
        }
368
}
369

    
370
/**
371
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
372
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
373
 */
374
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
375
{
376
        int y;
377
        for(y=0; y<BLOCK_SIZE; y++)
378
        {
379
                const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
380
                const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
381

    
382
                int sums[10];
383
                sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
384
                sums[1] = sums[0] - first  + dst[3];
385
                sums[2] = sums[1] - first  + dst[4];
386
                sums[3] = sums[2] - first  + dst[5];
387
                sums[4] = sums[3] - first  + dst[6];
388
                sums[5] = sums[4] - dst[0] + dst[7];
389
                sums[6] = sums[5] - dst[1] + last;
390
                sums[7] = sums[6] - dst[2] + last;
391
                sums[8] = sums[7] - dst[3] + last;
392
                sums[9] = sums[8] - dst[4] + last;
393

    
394
                dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
395
                dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
396
                dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
397
                dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
398
                dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
399
                dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
400
                dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
401
                dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
402

    
403
                dst+= stride;
404
        }
405
}
406

    
407
/**
408
 * Experimental Filter 1 (Horizontal)
409
 * will not damage linear gradients
410
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
411
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
412
 * MMX2 version does correct clipping C version doesnt
413
 * not identical with the vertical one
414
 */
415
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
416
{
417
        int y;
418
        static uint64_t *lut= NULL;
419
        if(lut==NULL)
420
        {
421
                int i;
422
                lut = av_malloc(256*8);
423
                for(i=0; i<256; i++)
424
                {
425
                        int v= i < 128 ? 2*i : 2*(i-256);
426
/*
427
//Simulate 112242211 9-Tap filter
428
                        uint64_t a= (v/16) & 0xFF;
429
                        uint64_t b= (v/8) & 0xFF;
430
                        uint64_t c= (v/4) & 0xFF;
431
                        uint64_t d= (3*v/8) & 0xFF;
432
*/
433
//Simulate piecewise linear interpolation
434
                        uint64_t a= (v/16) & 0xFF;
435
                        uint64_t b= (v*3/16) & 0xFF;
436
                        uint64_t c= (v*5/16) & 0xFF;
437
                        uint64_t d= (7*v/16) & 0xFF;
438
                        uint64_t A= (0x100 - a)&0xFF;
439
                        uint64_t B= (0x100 - b)&0xFF;
440
                        uint64_t C= (0x100 - c)&0xFF;
441
                        uint64_t D= (0x100 - c)&0xFF;
442

    
443
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
444
                                (D<<24) | (C<<16) | (B<<8) | (A);
445
                        //lut[i] = (v<<32) | (v<<24);
446
                }
447
        }
448

    
449
        for(y=0; y<BLOCK_SIZE; y++)
450
        {
451
                int a= src[1] - src[2];
452
                int b= src[3] - src[4];
453
                int c= src[5] - src[6];
454

    
455
                int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
456

    
457
                if(d < QP)
458
                {
459
                        int v = d * SIGN(-b);
460

    
461
                        src[1] +=v/8;
462
                        src[2] +=v/4;
463
                        src[3] +=3*v/8;
464
                        src[4] -=3*v/8;
465
                        src[5] -=v/4;
466
                        src[6] -=v/8;
467

    
468
                }
469
                src+=stride;
470
        }
471
}
472

    
473
/**
474
 * accurate deblock filter
475
 */
476
static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
477
        int y;
478
        const int QP= c->QP;
479
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
480
        const int dcThreshold= dcOffset*2 + 1;
481
//START_TIMER
482
        src+= step*4; // src points to begin of the 8x8 Block
483
        for(y=0; y<8; y++){
484
                int numEq= 0;
485

    
486
                if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
487
                if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
488
                if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
489
                if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
490
                if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
491
                if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
492
                if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
493
                if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
494
                if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
495
                if(numEq > c->ppMode.flatnessThreshold){
496
                        int min, max, x;
497

    
498
                        if(src[0] > src[step]){
499
                            max= src[0];
500
                            min= src[step];
501
                        }else{
502
                            max= src[step];
503
                            min= src[0];
504
                        }
505
                        for(x=2; x<8; x+=2){
506
                                if(src[x*step] > src[(x+1)*step]){
507
                                        if(src[x    *step] > max) max= src[ x   *step];
508
                                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
509
                                }else{
510
                                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
511
                                        if(src[ x   *step] < min) min= src[ x   *step];
512
                                }
513
                        }
514
                        if(max-min < 2*QP){
515
                                const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
516
                                const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
517

    
518
                                int sums[10];
519
                                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
520
                                sums[1] = sums[0] - first       + src[3*step];
521
                                sums[2] = sums[1] - first       + src[4*step];
522
                                sums[3] = sums[2] - first       + src[5*step];
523
                                sums[4] = sums[3] - first       + src[6*step];
524
                                sums[5] = sums[4] - src[0*step] + src[7*step];
525
                                sums[6] = sums[5] - src[1*step] + last;
526
                                sums[7] = sums[6] - src[2*step] + last;
527
                                sums[8] = sums[7] - src[3*step] + last;
528
                                sums[9] = sums[8] - src[4*step] + last;
529

    
530
                                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
531
                                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
532
                                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
533
                                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
534
                                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
535
                                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
536
                                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
537
                                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
538
                        }
539
                }else{
540
                        const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
541

    
542
                        if(ABS(middleEnergy) < 8*QP)
543
                        {
544
                                const int q=(src[3*step] - src[4*step])/2;
545
                                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
546
                                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
547

    
548
                                int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
549
                                d= MAX(d, 0);
550

    
551
                                d= (5*d + 32) >> 6;
552
                                d*= SIGN(-middleEnergy);
553

    
554
                                if(q>0)
555
                                {
556
                                        d= d<0 ? 0 : d;
557
                                        d= d>q ? q : d;
558
                                }
559
                                else
560
                                {
561
                                        d= d>0 ? 0 : d;
562
                                        d= d<q ? q : d;
563
                                }
564

    
565
                                src[3*step]-= d;
566
                                src[4*step]+= d;
567
                        }
568
                }
569

    
570
                src += stride;
571
        }
572
/*if(step==16){
573
    STOP_TIMER("step16")
574
}else{
575
    STOP_TIMER("stepX")
576
}*/
577
}
578

    
579
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
580
//Plain C versions
581
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
582
#define COMPILE_C
583
#endif
584

    
585
#ifdef ARCH_POWERPC
586
#ifdef HAVE_ALTIVEC
587
#define COMPILE_ALTIVEC
588
#endif //HAVE_ALTIVEC
589
#endif //ARCH_POWERPC
590

    
591
#if defined(ARCH_X86) || defined(ARCH_X86_64)
592

    
593
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
594
#define COMPILE_MMX
595
#endif
596

    
597
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
598
#define COMPILE_MMX2
599
#endif
600

    
601
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
602
#define COMPILE_3DNOW
603
#endif
604
#endif //ARCH_X86
605

    
606
#undef HAVE_MMX
607
#undef HAVE_MMX2
608
#undef HAVE_3DNOW
609
#undef HAVE_ALTIVEC
610

    
611
#ifdef COMPILE_C
612
#undef HAVE_MMX
613
#undef HAVE_MMX2
614
#undef HAVE_3DNOW
615
#define RENAME(a) a ## _C
616
#include "postprocess_template.c"
617
#endif
618

    
619
#ifdef ARCH_POWERPC
620
#ifdef COMPILE_ALTIVEC
621
#undef RENAME
622
#define HAVE_ALTIVEC
623
#define RENAME(a) a ## _altivec
624
#include "postprocess_altivec_template.c"
625
#include "postprocess_template.c"
626
#endif
627
#endif //ARCH_POWERPC
628

    
629
//MMX versions
630
#ifdef COMPILE_MMX
631
#undef RENAME
632
#define HAVE_MMX
633
#undef HAVE_MMX2
634
#undef HAVE_3DNOW
635
#define RENAME(a) a ## _MMX
636
#include "postprocess_template.c"
637
#endif
638

    
639
//MMX2 versions
640
#ifdef COMPILE_MMX2
641
#undef RENAME
642
#define HAVE_MMX
643
#define HAVE_MMX2
644
#undef HAVE_3DNOW
645
#define RENAME(a) a ## _MMX2
646
#include "postprocess_template.c"
647
#endif
648

    
649
//3DNOW versions
650
#ifdef COMPILE_3DNOW
651
#undef RENAME
652
#define HAVE_MMX
653
#undef HAVE_MMX2
654
#define HAVE_3DNOW
655
#define RENAME(a) a ## _3DNow
656
#include "postprocess_template.c"
657
#endif
658

    
659
// minor note: the HAVE_xyz is messed up after that line so dont use it
660

    
661
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
662
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
663
{
664
        PPContext *c= (PPContext *)vc;
665
        PPMode *ppMode= (PPMode *)vm;
666
        c->ppMode= *ppMode; //FIXME
667

    
668
        // useing ifs here as they are faster than function pointers allthough the
669
        // difference wouldnt be messureable here but its much better because
670
        // someone might exchange the cpu whithout restarting mplayer ;)
671
#ifdef RUNTIME_CPUDETECT
672
#if defined(ARCH_X86) || defined(ARCH_X86_64)
673
        // ordered per speed fasterst first
674
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
675
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
676
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
677
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
678
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
679
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
680
        else
681
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
682
#else
683
#ifdef ARCH_POWERPC
684
#ifdef HAVE_ALTIVEC
685
        if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
686
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687
        else
688
#endif
689
#endif
690
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
691
#endif
692
#else //RUNTIME_CPUDETECT
693
#ifdef HAVE_MMX2
694
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
695
#elif defined (HAVE_3DNOW)
696
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
697
#elif defined (HAVE_MMX)
698
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
699
#elif defined (HAVE_ALTIVEC)
700
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
701
#else
702
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
703
#endif
704
#endif //!RUNTIME_CPUDETECT
705
}
706

    
707
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
708
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
709

    
710
/* -pp Command line Help
711
*/
712
char *pp_help=
713
"Available postprocessing filters:\n"
714
"Filters                        Options\n"
715
"short  long name       short   long option     Description\n"
716
"*      *               a       autoq           CPU power dependent enabler\n"
717
"                       c       chrom           chrominance filtering enabled\n"
718
"                       y       nochrom         chrominance filtering disabled\n"
719
"                       n       noluma          luma filtering disabled\n"
720
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
721
"       1. difference factor: default=32, higher -> more deblocking\n"
722
"       2. flatness threshold: default=39, lower -> more deblocking\n"
723
"                       the h & v deblocking filters share these\n"
724
"                       so you can't set different thresholds for h / v\n"
725
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
726
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
727
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
728
"h1     x1hdeblock                              experimental h deblock filter 1\n"
729
"v1     x1vdeblock                              experimental v deblock filter 1\n"
730
"dr     dering                                  deringing filter\n"
731
"al     autolevels                              automatic brightness / contrast\n"
732
"                       f        fullyrange     stretch luminance to (0..255)\n"
733
"lb     linblenddeint                           linear blend deinterlacer\n"
734
"li     linipoldeint                            linear interpolating deinterlace\n"
735
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
736
"md     mediandeint                             median deinterlacer\n"
737
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
738
"l5     lowpass5                                FIR lowpass deinterlacer\n"
739
"de     default                                 hb:a,vb:a,dr:a\n"
740
"fa     fast                                    h1:a,v1:a,dr:a\n"
741
"ac                                             ha:a:128:7,va:a,dr:a\n"
742
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
743
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
744
"fq     forceQuant      <quantizer>             force quantizer\n"
745
"Usage:\n"
746
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
747
"long form example:\n"
748
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
749
"short form example:\n"
750
"vb:a/hb:a/lb                                   de,-vb\n"
751
"more examples:\n"
752
"tn:64:128:256\n"
753
"\n"
754
;
755

    
756
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
757
{
758
        char temp[GET_MODE_BUFFER_SIZE];
759
        char *p= temp;
760
        const char *filterDelimiters= ",/";
761
        const char *optionDelimiters= ":";
762
        struct PPMode *ppMode;
763
        char *filterToken;
764

    
765
        ppMode= av_malloc(sizeof(PPMode));
766

    
767
        ppMode->lumMode= 0;
768
        ppMode->chromMode= 0;
769
        ppMode->maxTmpNoise[0]= 700;
770
        ppMode->maxTmpNoise[1]= 1500;
771
        ppMode->maxTmpNoise[2]= 3000;
772
        ppMode->maxAllowedY= 234;
773
        ppMode->minAllowedY= 16;
774
        ppMode->baseDcDiff= 256/8;
775
        ppMode->flatnessThreshold= 56-16-1;
776
        ppMode->maxClippedThreshold= 0.01;
777
        ppMode->error=0;
778

    
779
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
780

    
781
        if(verbose>1) printf("pp: %s\n", name);
782

    
783
        for(;;){
784
                char *filterName;
785
                int q= 1000000; //PP_QUALITY_MAX;
786
                int chrom=-1;
787
                int luma=-1;
788
                char *option;
789
                char *options[OPTIONS_ARRAY_SIZE];
790
                int i;
791
                int filterNameOk=0;
792
                int numOfUnknownOptions=0;
793
                int enable=1; //does the user want us to enabled or disabled the filter
794

    
795
                filterToken= strtok(p, filterDelimiters);
796
                if(filterToken == NULL) break;
797
                p+= strlen(filterToken) + 1; // p points to next filterToken
798
                filterName= strtok(filterToken, optionDelimiters);
799
                if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
800

    
801
                if(*filterName == '-')
802
                {
803
                        enable=0;
804
                        filterName++;
805
                }
806

    
807
                for(;;){ //for all options
808
                        option= strtok(NULL, optionDelimiters);
809
                        if(option == NULL) break;
810

    
811
                        if(verbose>1) printf("pp: option: %s\n", option);
812
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
813
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
814
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
815
                        else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
816
                        else
817
                        {
818
                                options[numOfUnknownOptions] = option;
819
                                numOfUnknownOptions++;
820
                        }
821
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
822
                }
823
                options[numOfUnknownOptions] = NULL;
824

    
825
                /* replace stuff from the replace Table */
826
                for(i=0; replaceTable[2*i]!=NULL; i++)
827
                {
828
                        if(!strcmp(replaceTable[2*i], filterName))
829
                        {
830
                                int newlen= strlen(replaceTable[2*i + 1]);
831
                                int plen;
832
                                int spaceLeft;
833

    
834
                                if(p==NULL) p= temp, *p=0;      //last filter
835
                                else p--, *p=',';               //not last filter
836

    
837
                                plen= strlen(p);
838
                                spaceLeft= p - temp + plen;
839
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
840
                                {
841
                                        ppMode->error++;
842
                                        break;
843
                                }
844
                                memmove(p + newlen, p, plen+1);
845
                                memcpy(p, replaceTable[2*i + 1], newlen);
846
                                filterNameOk=1;
847
                        }
848
                }
849

    
850
                for(i=0; filters[i].shortName!=NULL; i++)
851
                {
852
//                        printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
853
                        if(   !strcmp(filters[i].longName, filterName)
854
                           || !strcmp(filters[i].shortName, filterName))
855
                        {
856
                                ppMode->lumMode &= ~filters[i].mask;
857
                                ppMode->chromMode &= ~filters[i].mask;
858

    
859
                                filterNameOk=1;
860
                                if(!enable) break; // user wants to disable it
861

    
862
                                if(q >= filters[i].minLumQuality && luma)
863
                                        ppMode->lumMode|= filters[i].mask;
864
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
865
                                        if(q >= filters[i].minChromQuality)
866
                                                ppMode->chromMode|= filters[i].mask;
867

    
868
                                if(filters[i].mask == LEVEL_FIX)
869
                                {
870
                                        int o;
871
                                        ppMode->minAllowedY= 16;
872
                                        ppMode->maxAllowedY= 234;
873
                                        for(o=0; options[o]!=NULL; o++)
874
                                        {
875
                                                if(  !strcmp(options[o],"fullyrange")
876
                                                   ||!strcmp(options[o],"f"))
877
                                                {
878
                                                        ppMode->minAllowedY= 0;
879
                                                        ppMode->maxAllowedY= 255;
880
                                                        numOfUnknownOptions--;
881
                                                }
882
                                        }
883
                                }
884
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
885
                                {
886
                                        int o;
887
                                        int numOfNoises=0;
888

    
889
                                        for(o=0; options[o]!=NULL; o++)
890
                                        {
891
                                                char *tail;
892
                                                ppMode->maxTmpNoise[numOfNoises]=
893
                                                        strtol(options[o], &tail, 0);
894
                                                if(tail!=options[o])
895
                                                {
896
                                                        numOfNoises++;
897
                                                        numOfUnknownOptions--;
898
                                                        if(numOfNoises >= 3) break;
899
                                                }
900
                                        }
901
                                }
902
                                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
903
                                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
904
                                {
905
                                        int o;
906

    
907
                                        for(o=0; options[o]!=NULL && o<2; o++)
908
                                        {
909
                                                char *tail;
910
                                                int val= strtol(options[o], &tail, 0);
911
                                                if(tail==options[o]) break;
912

    
913
                                                numOfUnknownOptions--;
914
                                                if(o==0) ppMode->baseDcDiff= val;
915
                                                else ppMode->flatnessThreshold= val;
916
                                        }
917
                                }
918
                                else if(filters[i].mask == FORCE_QUANT)
919
                                {
920
                                        int o;
921
                                        ppMode->forcedQuant= 15;
922

    
923
                                        for(o=0; options[o]!=NULL && o<1; o++)
924
                                        {
925
                                                char *tail;
926
                                                int val= strtol(options[o], &tail, 0);
927
                                                if(tail==options[o]) break;
928

    
929
                                                numOfUnknownOptions--;
930
                                                ppMode->forcedQuant= val;
931
                                        }
932
                                }
933
                        }
934
                }
935
                if(!filterNameOk) ppMode->error++;
936
                ppMode->error += numOfUnknownOptions;
937
        }
938

    
939
        if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
940
        if(ppMode->error)
941
        {
942
                fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
943
                av_free(ppMode);
944
                return NULL;
945
        }
946
        return ppMode;
947
}
948

    
949
void pp_free_mode(pp_mode_t *mode){
950
    av_free(mode);
951
}
952

    
953
static void reallocAlign(void **p, int alignment, int size){
954
        av_free(*p);
955
        *p= av_mallocz(size);
956
}
957

    
958
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
959
        int mbWidth = (width+15)>>4;
960
        int mbHeight= (height+15)>>4;
961
        int i;
962

    
963
        c->stride= stride;
964
        c->qpStride= qpStride;
965

    
966
        reallocAlign((void **)&c->tempDst, 8, stride*24);
967
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
968
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
969
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
970
        for(i=0; i<256; i++)
971
                c->yHistogram[i]= width*height/64*15/256;
972

    
973
        for(i=0; i<3; i++)
974
        {
975
                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
976
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
977
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
978
        }
979

    
980
        reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
981
        reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
982
        reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
983
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
984
}
985

    
986
static void global_init(void){
987
        int i;
988
        memset(clip_table, 0, 256);
989
        for(i=256; i<512; i++)
990
                clip_table[i]= i;
991
        memset(clip_table+512, 0, 256);
992
}
993

    
994
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
995
        PPContext *c= av_malloc(sizeof(PPContext));
996
        int stride= (width+15)&(~15);    //assumed / will realloc if needed
997
        int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
998

    
999
        global_init();
1000

    
1001
        memset(c, 0, sizeof(PPContext));
1002
        c->cpuCaps= cpuCaps;
1003
        if(cpuCaps&PP_FORMAT){
1004
                c->hChromaSubSample= cpuCaps&0x3;
1005
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
1006
        }else{
1007
                c->hChromaSubSample= 1;
1008
                c->vChromaSubSample= 1;
1009
        }
1010

    
1011
        reallocBuffers(c, width, height, stride, qpStride);
1012

    
1013
        c->frameNum=-1;
1014

    
1015
        return c;
1016
}
1017

    
1018
void pp_free_context(void *vc){
1019
        PPContext *c = (PPContext*)vc;
1020
        int i;
1021

    
1022
        for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1023
        for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1024

    
1025
        av_free(c->tempBlocks);
1026
        av_free(c->yHistogram);
1027
        av_free(c->tempDst);
1028
        av_free(c->tempSrc);
1029
        av_free(c->deintTemp);
1030
        av_free(c->stdQPTable);
1031
        av_free(c->nonBQPTable);
1032
        av_free(c->forcedQPTable);
1033

    
1034
        memset(c, 0, sizeof(PPContext));
1035

    
1036
        av_free(c);
1037
}
1038

    
1039
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
1040
                 uint8_t * dst[3], int dstStride[3],
1041
                 int width, int height,
1042
                 QP_STORE_T *QP_store,  int QPStride,
1043
                 pp_mode_t *vm,  void *vc, int pict_type)
1044
{
1045
        int mbWidth = (width+15)>>4;
1046
        int mbHeight= (height+15)>>4;
1047
        PPMode *mode = (PPMode*)vm;
1048
        PPContext *c = (PPContext*)vc;
1049
        int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0]));
1050
        int absQPStride = ABS(QPStride);
1051

    
1052
        // c->stride and c->QPStride are always positive
1053
        if(c->stride < minStride || c->qpStride < absQPStride)
1054
                reallocBuffers(c, width, height,
1055
                                MAX(minStride, c->stride),
1056
                                MAX(c->qpStride, absQPStride));
1057

    
1058
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1059
        {
1060
                int i;
1061
                QP_store= c->forcedQPTable;
1062
                absQPStride = QPStride = 0;
1063
                if(mode->lumMode & FORCE_QUANT)
1064
                        for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1065
                else
1066
                        for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1067
        }
1068
//printf("pict_type:%d\n", pict_type);
1069

    
1070
        if(pict_type & PP_PICT_TYPE_QP2){
1071
                int i;
1072
                const int count= mbHeight * absQPStride;
1073
                for(i=0; i<(count>>2); i++){
1074
                        ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1075
                }
1076
                for(i<<=2; i<count; i++){
1077
                        c->stdQPTable[i] = QP_store[i]>>1;
1078
                }
1079
                QP_store= c->stdQPTable;
1080
                QPStride= absQPStride;
1081
        }
1082

    
1083
if(0){
1084
int x,y;
1085
for(y=0; y<mbHeight; y++){
1086
        for(x=0; x<mbWidth; x++){
1087
                printf("%2d ", QP_store[x + y*QPStride]);
1088
        }
1089
        printf("\n");
1090
}
1091
        printf("\n");
1092
}
1093

    
1094
        if((pict_type&7)!=3)
1095
        {
1096
                if (QPStride >= 0) {
1097
                        int i;
1098
                        const int count= mbHeight * QPStride;
1099
                        for(i=0; i<(count>>2); i++){
1100
                                ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1101
                        }
1102
                        for(i<<=2; i<count; i++){
1103
                                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1104
                        }
1105
                } else {
1106
                        int i,j;
1107
                        for(i=0; i<mbHeight; i++) {
1108
                                    for(j=0; j<absQPStride; j++) {
1109
                                        c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1110
                                }
1111
                        }
1112
                }
1113
        }
1114

    
1115
        if(verbose>2)
1116
        {
1117
                printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
1118
        }
1119

    
1120
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1121
                width, height, QP_store, QPStride, 0, mode, c);
1122

    
1123
        width  = (width )>>c->hChromaSubSample;
1124
        height = (height)>>c->vChromaSubSample;
1125

    
1126
        if(mode->chromMode)
1127
        {
1128
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1129
                        width, height, QP_store, QPStride, 1, mode, c);
1130
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1131
                        width, height, QP_store, QPStride, 2, mode, c);
1132
        }
1133
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1134
        {
1135
                linecpy(dst[1], src[1], height, srcStride[1]);
1136
                linecpy(dst[2], src[2], height, srcStride[2]);
1137
        }
1138
        else
1139
        {
1140
                int y;
1141
                for(y=0; y<height; y++)
1142
                {
1143
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1144
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1145
                }
1146
        }
1147
}
1148