Statistics
| Branch: | Revision:

ffmpeg / libpostproc / postprocess.c @ a8988916

History | View | Annotate | Download (44 KB)

1
/*
2
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23
/**
24
 * @file postprocess.c
25
 * postprocessing.
26
 */
27

    
28
/*
29
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49

50
* i do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
53
e = allmost exact implementation (slightly different rounding,...)
54
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57
*/
58

    
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
        (the if/else stuff per block is slowing things down)
67
compare the quality & speed of all filters
68
split this huge file
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72
*/
73

    
74
//Changelog: use the Subversion log
75

    
76
#include "config.h"
77
#include "avutil.h"
78
#include <inttypes.h>
79
#include <stdio.h>
80
#include <stdlib.h>
81
#include <string.h>
82
#ifdef HAVE_MALLOC_H
83
#include <malloc.h>
84
#endif
85
//#undef HAVE_MMX2
86
//#define HAVE_3DNOW
87
//#undef HAVE_MMX
88
//#undef ARCH_X86
89
//#define DEBUG_BRIGHTNESS
90
#include "postprocess.h"
91
#include "postprocess_internal.h"
92

    
93
#ifdef HAVE_ALTIVEC_H
94
#include <altivec.h>
95
#endif
96

    
97
#define GET_MODE_BUFFER_SIZE 500
98
#define OPTIONS_ARRAY_SIZE 10
99
#define BLOCK_SIZE 8
100
#define TEMP_STRIDE 8
101
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
102

    
103
#if defined(ARCH_X86)
104
static DECLARE_ALIGNED(8, uint64_t attribute_used, w05)= 0x0005000500050005LL;
105
static DECLARE_ALIGNED(8, uint64_t attribute_used, w04)= 0x0004000400040004LL;
106
static DECLARE_ALIGNED(8, uint64_t attribute_used, w20)= 0x0020002000200020LL;
107
static DECLARE_ALIGNED(8, uint64_t attribute_used, b00)= 0x0000000000000000LL;
108
static DECLARE_ALIGNED(8, uint64_t attribute_used, b01)= 0x0101010101010101LL;
109
static DECLARE_ALIGNED(8, uint64_t attribute_used, b02)= 0x0202020202020202LL;
110
static DECLARE_ALIGNED(8, uint64_t attribute_used, b08)= 0x0808080808080808LL;
111
static DECLARE_ALIGNED(8, uint64_t attribute_used, b80)= 0x8080808080808080LL;
112
#endif
113

    
114
static uint8_t clip_table[3*256];
115
static uint8_t * const clip_tab= clip_table + 256;
116

    
117
static const int attribute_used deringThreshold= 20;
118

    
119

    
120
static struct PPFilter filters[]=
121
{
122
        {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
123
        {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
124
/*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
125
        {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
126
        {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
127
        {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
128
        {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
129
        {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
130
        {"dr", "dering",                1, 5, 6, DERING},
131
        {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
132
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
133
        {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
134
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
135
        {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
136
        {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
137
        {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
138
        {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
139
        {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
140
        {NULL, NULL,0,0,0,0} //End Marker
141
};
142

    
143
static const char *replaceTable[]=
144
{
145
        "default",      "hdeblock:a,vdeblock:a,dering:a",
146
        "de",           "hdeblock:a,vdeblock:a,dering:a",
147
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
148
        "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a",
149
        "ac",           "ha:a:128:7,va:a,dering:a",
150
        NULL //End Marker
151
};
152

    
153

    
154
#if defined(ARCH_X86)
155
static inline void prefetchnta(void *p)
156
{
157
        asm volatile(   "prefetchnta (%0)\n\t"
158
                : : "r" (p)
159
        );
160
}
161

    
162
static inline void prefetcht0(void *p)
163
{
164
        asm volatile(   "prefetcht0 (%0)\n\t"
165
                : : "r" (p)
166
        );
167
}
168

    
169
static inline void prefetcht1(void *p)
170
{
171
        asm volatile(   "prefetcht1 (%0)\n\t"
172
                : : "r" (p)
173
        );
174
}
175

    
176
static inline void prefetcht2(void *p)
177
{
178
        asm volatile(   "prefetcht2 (%0)\n\t"
179
                : : "r" (p)
180
        );
181
}
182
#endif
183

    
184
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
185

    
186
/**
187
 * Check if the given 8x8 Block is mostly "flat"
188
 */
189
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
190
{
191
        int numEq= 0;
192
        int y;
193
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
194
        const int dcThreshold= dcOffset*2 + 1;
195

    
196
        for(y=0; y<BLOCK_SIZE; y++)
197
        {
198
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
199
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
200
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
201
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
202
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
203
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
204
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
205
                src+= stride;
206
        }
207
        return numEq > c->ppMode.flatnessThreshold;
208
}
209

    
210
/**
211
 * Check if the middle 8x8 Block in the given 8x16 block is flat
212
 */
213
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
214
        int numEq= 0;
215
        int y;
216
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
217
        const int dcThreshold= dcOffset*2 + 1;
218

    
219
        src+= stride*4; // src points to begin of the 8x8 Block
220
        for(y=0; y<BLOCK_SIZE-1; y++)
221
        {
222
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
223
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
224
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
225
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
226
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
227
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
228
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
229
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
230
                src+= stride;
231
        }
232
        return numEq > c->ppMode.flatnessThreshold;
233
}
234

    
235
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
236
{
237
        int i;
238
#if 1
239
        for(i=0; i<2; i++){
240
                if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
241
                src += stride;
242
                if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
243
                src += stride;
244
                if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
245
                src += stride;
246
                if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
247
                src += stride;
248
        }
249
#else
250
        for(i=0; i<8; i++){
251
                if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
252
                src += stride;
253
        }
254
#endif
255
        return 1;
256
}
257

    
258
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
259
{
260
#if 1
261
#if 1
262
        int x;
263
        src+= stride*4;
264
        for(x=0; x<BLOCK_SIZE; x+=4)
265
        {
266
                if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
267
                if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
268
                if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
269
                if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
270
        }
271
#else
272
        int x;
273
        src+= stride*3;
274
        for(x=0; x<BLOCK_SIZE; x++)
275
        {
276
                if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
277
        }
278
#endif
279
        return 1;
280
#else
281
        int x;
282
        src+= stride*4;
283
        for(x=0; x<BLOCK_SIZE; x++)
284
        {
285
                int min=255;
286
                int max=0;
287
                int y;
288
                for(y=0; y<8; y++){
289
                        int v= src[x + y*stride];
290
                        if(v>max) max=v;
291
                        if(v<min) min=v;
292
                }
293
                if(max-min > 2*QP) return 0;
294
        }
295
        return 1;
296
#endif
297
}
298

    
299
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
300
        if( isHorizDC_C(src, stride, c) ){
301
                if( isHorizMinMaxOk_C(src, stride, c->QP) )
302
                        return 1;
303
                else
304
                        return 0;
305
        }else{
306
                return 2;
307
        }
308
}
309

    
310
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
311
        if( isVertDC_C(src, stride, c) ){
312
                if( isVertMinMaxOk_C(src, stride, c->QP) )
313
                        return 1;
314
                else
315
                        return 0;
316
        }else{
317
                return 2;
318
        }
319
}
320

    
321
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
322
{
323
        int y;
324
        for(y=0; y<BLOCK_SIZE; y++)
325
        {
326
                const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
327

    
328
                if(FFABS(middleEnergy) < 8*c->QP)
329
                {
330
                        const int q=(dst[3] - dst[4])/2;
331
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
332
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
333

    
334
                        int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
335
                        d= FFMAX(d, 0);
336

    
337
                        d= (5*d + 32) >> 6;
338
                        d*= FFSIGN(-middleEnergy);
339

    
340
                        if(q>0)
341
                        {
342
                                d= d<0 ? 0 : d;
343
                                d= d>q ? q : d;
344
                        }
345
                        else
346
                        {
347
                                d= d>0 ? 0 : d;
348
                                d= d<q ? q : d;
349
                        }
350

    
351
                        dst[3]-= d;
352
                        dst[4]+= d;
353
                }
354
                dst+= stride;
355
        }
356
}
357

    
358
/**
359
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
360
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
361
 */
362
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
363
{
364
        int y;
365
        for(y=0; y<BLOCK_SIZE; y++)
366
        {
367
                const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
368
                const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
369

    
370
                int sums[10];
371
                sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
372
                sums[1] = sums[0] - first  + dst[3];
373
                sums[2] = sums[1] - first  + dst[4];
374
                sums[3] = sums[2] - first  + dst[5];
375
                sums[4] = sums[3] - first  + dst[6];
376
                sums[5] = sums[4] - dst[0] + dst[7];
377
                sums[6] = sums[5] - dst[1] + last;
378
                sums[7] = sums[6] - dst[2] + last;
379
                sums[8] = sums[7] - dst[3] + last;
380
                sums[9] = sums[8] - dst[4] + last;
381

    
382
                dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
383
                dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
384
                dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
385
                dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
386
                dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
387
                dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
388
                dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
389
                dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
390

    
391
                dst+= stride;
392
        }
393
}
394

    
395
/**
396
 * Experimental Filter 1 (Horizontal)
397
 * will not damage linear gradients
398
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
399
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
400
 * MMX2 version does correct clipping C version does not
401
 * not identical with the vertical one
402
 */
403
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
404
{
405
        int y;
406
        static uint64_t *lut= NULL;
407
        if(lut==NULL)
408
        {
409
                int i;
410
                lut = av_malloc(256*8);
411
                for(i=0; i<256; i++)
412
                {
413
                        int v= i < 128 ? 2*i : 2*(i-256);
414
/*
415
//Simulate 112242211 9-Tap filter
416
                        uint64_t a= (v/16) & 0xFF;
417
                        uint64_t b= (v/8) & 0xFF;
418
                        uint64_t c= (v/4) & 0xFF;
419
                        uint64_t d= (3*v/8) & 0xFF;
420
*/
421
//Simulate piecewise linear interpolation
422
                        uint64_t a= (v/16) & 0xFF;
423
                        uint64_t b= (v*3/16) & 0xFF;
424
                        uint64_t c= (v*5/16) & 0xFF;
425
                        uint64_t d= (7*v/16) & 0xFF;
426
                        uint64_t A= (0x100 - a)&0xFF;
427
                        uint64_t B= (0x100 - b)&0xFF;
428
                        uint64_t C= (0x100 - c)&0xFF;
429
                        uint64_t D= (0x100 - c)&0xFF;
430

    
431
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
432
                                (D<<24) | (C<<16) | (B<<8) | (A);
433
                        //lut[i] = (v<<32) | (v<<24);
434
                }
435
        }
436

    
437
        for(y=0; y<BLOCK_SIZE; y++)
438
        {
439
                int a= src[1] - src[2];
440
                int b= src[3] - src[4];
441
                int c= src[5] - src[6];
442

    
443
                int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
444

    
445
                if(d < QP)
446
                {
447
                        int v = d * FFSIGN(-b);
448

    
449
                        src[1] +=v/8;
450
                        src[2] +=v/4;
451
                        src[3] +=3*v/8;
452
                        src[4] -=3*v/8;
453
                        src[5] -=v/4;
454
                        src[6] -=v/8;
455

    
456
                }
457
                src+=stride;
458
        }
459
}
460

    
461
/**
462
 * accurate deblock filter
463
 */
464
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
465
        int y;
466
        const int QP= c->QP;
467
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
468
        const int dcThreshold= dcOffset*2 + 1;
469
//START_TIMER
470
        src+= step*4; // src points to begin of the 8x8 Block
471
        for(y=0; y<8; y++){
472
                int numEq= 0;
473

    
474
                if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
475
                if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
476
                if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
477
                if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
478
                if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
479
                if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
480
                if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
481
                if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
482
                if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
483
                if(numEq > c->ppMode.flatnessThreshold){
484
                        int min, max, x;
485

    
486
                        if(src[0] > src[step]){
487
                            max= src[0];
488
                            min= src[step];
489
                        }else{
490
                            max= src[step];
491
                            min= src[0];
492
                        }
493
                        for(x=2; x<8; x+=2){
494
                                if(src[x*step] > src[(x+1)*step]){
495
                                        if(src[x    *step] > max) max= src[ x   *step];
496
                                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
497
                                }else{
498
                                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
499
                                        if(src[ x   *step] < min) min= src[ x   *step];
500
                                }
501
                        }
502
                        if(max-min < 2*QP){
503
                                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
504
                                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
505

    
506
                                int sums[10];
507
                                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
508
                                sums[1] = sums[0] - first       + src[3*step];
509
                                sums[2] = sums[1] - first       + src[4*step];
510
                                sums[3] = sums[2] - first       + src[5*step];
511
                                sums[4] = sums[3] - first       + src[6*step];
512
                                sums[5] = sums[4] - src[0*step] + src[7*step];
513
                                sums[6] = sums[5] - src[1*step] + last;
514
                                sums[7] = sums[6] - src[2*step] + last;
515
                                sums[8] = sums[7] - src[3*step] + last;
516
                                sums[9] = sums[8] - src[4*step] + last;
517

    
518
                                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
519
                                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
520
                                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
521
                                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
522
                                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
523
                                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
524
                                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
525
                                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
526
                        }
527
                }else{
528
                        const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
529

    
530
                        if(FFABS(middleEnergy) < 8*QP)
531
                        {
532
                                const int q=(src[3*step] - src[4*step])/2;
533
                                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
534
                                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
535

    
536
                                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
537
                                d= FFMAX(d, 0);
538

    
539
                                d= (5*d + 32) >> 6;
540
                                d*= FFSIGN(-middleEnergy);
541

    
542
                                if(q>0)
543
                                {
544
                                        d= d<0 ? 0 : d;
545
                                        d= d>q ? q : d;
546
                                }
547
                                else
548
                                {
549
                                        d= d>0 ? 0 : d;
550
                                        d= d<q ? q : d;
551
                                }
552

    
553
                                src[3*step]-= d;
554
                                src[4*step]+= d;
555
                        }
556
                }
557

    
558
                src += stride;
559
        }
560
/*if(step==16){
561
    STOP_TIMER("step16")
562
}else{
563
    STOP_TIMER("stepX")
564
}*/
565
}
566

    
567
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
568
//Plain C versions
569
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
570
#define COMPILE_C
571
#endif
572

    
573
#ifdef ARCH_POWERPC
574
#ifdef HAVE_ALTIVEC
575
#define COMPILE_ALTIVEC
576
#endif //HAVE_ALTIVEC
577
#endif //ARCH_POWERPC
578

    
579
#if defined(ARCH_X86)
580

    
581
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
582
#define COMPILE_MMX
583
#endif
584

    
585
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
586
#define COMPILE_MMX2
587
#endif
588

    
589
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
590
#define COMPILE_3DNOW
591
#endif
592
#endif /* defined(ARCH_X86) */
593

    
594
#undef HAVE_MMX
595
#undef HAVE_MMX2
596
#undef HAVE_3DNOW
597
#undef HAVE_ALTIVEC
598

    
599
#ifdef COMPILE_C
600
#undef HAVE_MMX
601
#undef HAVE_MMX2
602
#undef HAVE_3DNOW
603
#define RENAME(a) a ## _C
604
#include "postprocess_template.c"
605
#endif
606

    
607
#ifdef ARCH_POWERPC
608
#ifdef COMPILE_ALTIVEC
609
#undef RENAME
610
#define HAVE_ALTIVEC
611
#define RENAME(a) a ## _altivec
612
#include "postprocess_altivec_template.c"
613
#include "postprocess_template.c"
614
#endif
615
#endif //ARCH_POWERPC
616

    
617
//MMX versions
618
#ifdef COMPILE_MMX
619
#undef RENAME
620
#define HAVE_MMX
621
#undef HAVE_MMX2
622
#undef HAVE_3DNOW
623
#define RENAME(a) a ## _MMX
624
#include "postprocess_template.c"
625
#endif
626

    
627
//MMX2 versions
628
#ifdef COMPILE_MMX2
629
#undef RENAME
630
#define HAVE_MMX
631
#define HAVE_MMX2
632
#undef HAVE_3DNOW
633
#define RENAME(a) a ## _MMX2
634
#include "postprocess_template.c"
635
#endif
636

    
637
//3DNOW versions
638
#ifdef COMPILE_3DNOW
639
#undef RENAME
640
#define HAVE_MMX
641
#undef HAVE_MMX2
642
#define HAVE_3DNOW
643
#define RENAME(a) a ## _3DNow
644
#include "postprocess_template.c"
645
#endif
646

    
647
// minor note: the HAVE_xyz is messed up after that line so do not use it.
648

    
649
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
650
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
651
{
652
        PPContext *c= (PPContext *)vc;
653
        PPMode *ppMode= (PPMode *)vm;
654
        c->ppMode= *ppMode; //FIXME
655

    
656
        // Using ifs here as they are faster than function pointers although the
657
        // difference would not be measureable here but it is much better because
658
        // someone might exchange the CPU whithout restarting MPlayer ;)
659
#ifdef RUNTIME_CPUDETECT
660
#if defined(ARCH_X86)
661
        // ordered per speed fasterst first
662
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
663
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
664
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
665
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
666
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
667
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
668
        else
669
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670
#else
671
#ifdef ARCH_POWERPC
672
#ifdef HAVE_ALTIVEC
673
        if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
674
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
675
        else
676
#endif
677
#endif
678
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
679
#endif
680
#else //RUNTIME_CPUDETECT
681
#ifdef HAVE_MMX2
682
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
683
#elif defined (HAVE_3DNOW)
684
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685
#elif defined (HAVE_MMX)
686
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687
#elif defined (HAVE_ALTIVEC)
688
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689
#else
690
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
691
#endif
692
#endif //!RUNTIME_CPUDETECT
693
}
694

    
695
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
696
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
697

    
698
/* -pp Command line Help
699
*/
700
char *pp_help=
701
"Available postprocessing filters:\n"
702
"Filters                        Options\n"
703
"short  long name       short   long option     Description\n"
704
"*      *               a       autoq           CPU power dependent enabler\n"
705
"                       c       chrom           chrominance filtering enabled\n"
706
"                       y       nochrom         chrominance filtering disabled\n"
707
"                       n       noluma          luma filtering disabled\n"
708
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
709
"       1. difference factor: default=32, higher -> more deblocking\n"
710
"       2. flatness threshold: default=39, lower -> more deblocking\n"
711
"                       the h & v deblocking filters share these\n"
712
"                       so you can't set different thresholds for h / v\n"
713
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
714
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
715
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
716
"h1     x1hdeblock                              experimental h deblock filter 1\n"
717
"v1     x1vdeblock                              experimental v deblock filter 1\n"
718
"dr     dering                                  deringing filter\n"
719
"al     autolevels                              automatic brightness / contrast\n"
720
"                       f        fullyrange     stretch luminance to (0..255)\n"
721
"lb     linblenddeint                           linear blend deinterlacer\n"
722
"li     linipoldeint                            linear interpolating deinterlace\n"
723
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
724
"md     mediandeint                             median deinterlacer\n"
725
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
726
"l5     lowpass5                                FIR lowpass deinterlacer\n"
727
"de     default                                 hb:a,vb:a,dr:a\n"
728
"fa     fast                                    h1:a,v1:a,dr:a\n"
729
"ac                                             ha:a:128:7,va:a,dr:a\n"
730
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
731
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
732
"fq     forceQuant      <quantizer>             force quantizer\n"
733
"Usage:\n"
734
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
735
"long form example:\n"
736
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
737
"short form example:\n"
738
"vb:a/hb:a/lb                                   de,-vb\n"
739
"more examples:\n"
740
"tn:64:128:256\n"
741
"\n"
742
;
743

    
744
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
745
{
746
        char temp[GET_MODE_BUFFER_SIZE];
747
        char *p= temp;
748
        static const char filterDelimiters[] = ",/";
749
        static const char optionDelimiters[] = ":";
750
        struct PPMode *ppMode;
751
        char *filterToken;
752

    
753
        ppMode= av_malloc(sizeof(PPMode));
754

    
755
        ppMode->lumMode= 0;
756
        ppMode->chromMode= 0;
757
        ppMode->maxTmpNoise[0]= 700;
758
        ppMode->maxTmpNoise[1]= 1500;
759
        ppMode->maxTmpNoise[2]= 3000;
760
        ppMode->maxAllowedY= 234;
761
        ppMode->minAllowedY= 16;
762
        ppMode->baseDcDiff= 256/8;
763
        ppMode->flatnessThreshold= 56-16-1;
764
        ppMode->maxClippedThreshold= 0.01;
765
        ppMode->error=0;
766

    
767
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
768

    
769
        av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
770

    
771
        for(;;){
772
                char *filterName;
773
                int q= 1000000; //PP_QUALITY_MAX;
774
                int chrom=-1;
775
                int luma=-1;
776
                char *option;
777
                char *options[OPTIONS_ARRAY_SIZE];
778
                int i;
779
                int filterNameOk=0;
780
                int numOfUnknownOptions=0;
781
                int enable=1; //does the user want us to enabled or disabled the filter
782

    
783
                filterToken= strtok(p, filterDelimiters);
784
                if(filterToken == NULL) break;
785
                p+= strlen(filterToken) + 1; // p points to next filterToken
786
                filterName= strtok(filterToken, optionDelimiters);
787
                av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
788

    
789
                if(*filterName == '-')
790
                {
791
                        enable=0;
792
                        filterName++;
793
                }
794

    
795
                for(;;){ //for all options
796
                        option= strtok(NULL, optionDelimiters);
797
                        if(option == NULL) break;
798

    
799
                        av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
800
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
801
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
802
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
803
                        else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
804
                        else
805
                        {
806
                                options[numOfUnknownOptions] = option;
807
                                numOfUnknownOptions++;
808
                        }
809
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
810
                }
811
                options[numOfUnknownOptions] = NULL;
812

    
813
                /* replace stuff from the replace Table */
814
                for(i=0; replaceTable[2*i]!=NULL; i++)
815
                {
816
                        if(!strcmp(replaceTable[2*i], filterName))
817
                        {
818
                                int newlen= strlen(replaceTable[2*i + 1]);
819
                                int plen;
820
                                int spaceLeft;
821

    
822
                                if(p==NULL) p= temp, *p=0;      //last filter
823
                                else p--, *p=',';               //not last filter
824

    
825
                                plen= strlen(p);
826
                                spaceLeft= p - temp + plen;
827
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
828
                                {
829
                                        ppMode->error++;
830
                                        break;
831
                                }
832
                                memmove(p + newlen, p, plen+1);
833
                                memcpy(p, replaceTable[2*i + 1], newlen);
834
                                filterNameOk=1;
835
                        }
836
                }
837

    
838
                for(i=0; filters[i].shortName!=NULL; i++)
839
                {
840
                        if(   !strcmp(filters[i].longName, filterName)
841
                           || !strcmp(filters[i].shortName, filterName))
842
                        {
843
                                ppMode->lumMode &= ~filters[i].mask;
844
                                ppMode->chromMode &= ~filters[i].mask;
845

    
846
                                filterNameOk=1;
847
                                if(!enable) break; // user wants to disable it
848

    
849
                                if(q >= filters[i].minLumQuality && luma)
850
                                        ppMode->lumMode|= filters[i].mask;
851
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
852
                                        if(q >= filters[i].minChromQuality)
853
                                                ppMode->chromMode|= filters[i].mask;
854

    
855
                                if(filters[i].mask == LEVEL_FIX)
856
                                {
857
                                        int o;
858
                                        ppMode->minAllowedY= 16;
859
                                        ppMode->maxAllowedY= 234;
860
                                        for(o=0; options[o]!=NULL; o++)
861
                                        {
862
                                                if(  !strcmp(options[o],"fullyrange")
863
                                                   ||!strcmp(options[o],"f"))
864
                                                {
865
                                                        ppMode->minAllowedY= 0;
866
                                                        ppMode->maxAllowedY= 255;
867
                                                        numOfUnknownOptions--;
868
                                                }
869
                                        }
870
                                }
871
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
872
                                {
873
                                        int o;
874
                                        int numOfNoises=0;
875

    
876
                                        for(o=0; options[o]!=NULL; o++)
877
                                        {
878
                                                char *tail;
879
                                                ppMode->maxTmpNoise[numOfNoises]=
880
                                                        strtol(options[o], &tail, 0);
881
                                                if(tail!=options[o])
882
                                                {
883
                                                        numOfNoises++;
884
                                                        numOfUnknownOptions--;
885
                                                        if(numOfNoises >= 3) break;
886
                                                }
887
                                        }
888
                                }
889
                                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
890
                                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
891
                                {
892
                                        int o;
893

    
894
                                        for(o=0; options[o]!=NULL && o<2; o++)
895
                                        {
896
                                                char *tail;
897
                                                int val= strtol(options[o], &tail, 0);
898
                                                if(tail==options[o]) break;
899

    
900
                                                numOfUnknownOptions--;
901
                                                if(o==0) ppMode->baseDcDiff= val;
902
                                                else ppMode->flatnessThreshold= val;
903
                                        }
904
                                }
905
                                else if(filters[i].mask == FORCE_QUANT)
906
                                {
907
                                        int o;
908
                                        ppMode->forcedQuant= 15;
909

    
910
                                        for(o=0; options[o]!=NULL && o<1; o++)
911
                                        {
912
                                                char *tail;
913
                                                int val= strtol(options[o], &tail, 0);
914
                                                if(tail==options[o]) break;
915

    
916
                                                numOfUnknownOptions--;
917
                                                ppMode->forcedQuant= val;
918
                                        }
919
                                }
920
                        }
921
                }
922
                if(!filterNameOk) ppMode->error++;
923
                ppMode->error += numOfUnknownOptions;
924
        }
925

    
926
        av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
927
        if(ppMode->error)
928
        {
929
                av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
930
                av_free(ppMode);
931
                return NULL;
932
        }
933
        return ppMode;
934
}
935

    
936
void pp_free_mode(pp_mode_t *mode){
937
    av_free(mode);
938
}
939

    
940
static void reallocAlign(void **p, int alignment, int size){
941
        av_free(*p);
942
        *p= av_mallocz(size);
943
}
944

    
945
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
946
        int mbWidth = (width+15)>>4;
947
        int mbHeight= (height+15)>>4;
948
        int i;
949

    
950
        c->stride= stride;
951
        c->qpStride= qpStride;
952

    
953
        reallocAlign((void **)&c->tempDst, 8, stride*24);
954
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
955
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
956
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
957
        for(i=0; i<256; i++)
958
                c->yHistogram[i]= width*height/64*15/256;
959

    
960
        for(i=0; i<3; i++)
961
        {
962
                //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
963
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
964
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
965
        }
966

    
967
        reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
968
        reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
969
        reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
970
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
971
}
972

    
973
static void global_init(void){
974
        int i;
975
        memset(clip_table, 0, 256);
976
        for(i=256; i<512; i++)
977
                clip_table[i]= i;
978
        memset(clip_table+512, 0, 256);
979
}
980

    
981
static const char * context_to_name(void * ptr) {
982
    return "postproc";
983
}
984

    
985
static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
986

    
987
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
988
        PPContext *c= av_malloc(sizeof(PPContext));
989
        int stride= (width+15)&(~15);    //assumed / will realloc if needed
990
        int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
991

    
992
        global_init();
993

    
994
        memset(c, 0, sizeof(PPContext));
995
        c->av_class = &av_codec_context_class;
996
        c->cpuCaps= cpuCaps;
997
        if(cpuCaps&PP_FORMAT){
998
                c->hChromaSubSample= cpuCaps&0x3;
999
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
1000
        }else{
1001
                c->hChromaSubSample= 1;
1002
                c->vChromaSubSample= 1;
1003
        }
1004

    
1005
        reallocBuffers(c, width, height, stride, qpStride);
1006

    
1007
        c->frameNum=-1;
1008

    
1009
        return c;
1010
}
1011

    
1012
void pp_free_context(void *vc){
1013
        PPContext *c = (PPContext*)vc;
1014
        int i;
1015

    
1016
        for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1017
        for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1018

    
1019
        av_free(c->tempBlocks);
1020
        av_free(c->yHistogram);
1021
        av_free(c->tempDst);
1022
        av_free(c->tempSrc);
1023
        av_free(c->deintTemp);
1024
        av_free(c->stdQPTable);
1025
        av_free(c->nonBQPTable);
1026
        av_free(c->forcedQPTable);
1027

    
1028
        memset(c, 0, sizeof(PPContext));
1029

    
1030
        av_free(c);
1031
}
1032

    
1033
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
1034
                 uint8_t * dst[3], int dstStride[3],
1035
                 int width, int height,
1036
                 QP_STORE_T *QP_store,  int QPStride,
1037
                 pp_mode_t *vm,  void *vc, int pict_type)
1038
{
1039
        int mbWidth = (width+15)>>4;
1040
        int mbHeight= (height+15)>>4;
1041
        PPMode *mode = (PPMode*)vm;
1042
        PPContext *c = (PPContext*)vc;
1043
        int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1044
        int absQPStride = FFABS(QPStride);
1045

    
1046
        // c->stride and c->QPStride are always positive
1047
        if(c->stride < minStride || c->qpStride < absQPStride)
1048
                reallocBuffers(c, width, height,
1049
                                FFMAX(minStride, c->stride),
1050
                                FFMAX(c->qpStride, absQPStride));
1051

    
1052
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1053
        {
1054
                int i;
1055
                QP_store= c->forcedQPTable;
1056
                absQPStride = QPStride = 0;
1057
                if(mode->lumMode & FORCE_QUANT)
1058
                        for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1059
                else
1060
                        for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1061
        }
1062

    
1063
        if(pict_type & PP_PICT_TYPE_QP2){
1064
                int i;
1065
                const int count= mbHeight * absQPStride;
1066
                for(i=0; i<(count>>2); i++){
1067
                        ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1068
                }
1069
                for(i<<=2; i<count; i++){
1070
                        c->stdQPTable[i] = QP_store[i]>>1;
1071
                }
1072
                QP_store= c->stdQPTable;
1073
                QPStride= absQPStride;
1074
        }
1075

    
1076
if(0){
1077
int x,y;
1078
for(y=0; y<mbHeight; y++){
1079
        for(x=0; x<mbWidth; x++){
1080
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1081
        }
1082
        av_log(c, AV_LOG_INFO, "\n");
1083
}
1084
        av_log(c, AV_LOG_INFO, "\n");
1085
}
1086

    
1087
        if((pict_type&7)!=3)
1088
        {
1089
                if (QPStride >= 0) {
1090
                        int i;
1091
                        const int count= mbHeight * QPStride;
1092
                        for(i=0; i<(count>>2); i++){
1093
                                ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1094
                        }
1095
                        for(i<<=2; i<count; i++){
1096
                                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1097
                        }
1098
                } else {
1099
                        int i,j;
1100
                        for(i=0; i<mbHeight; i++) {
1101
                                    for(j=0; j<absQPStride; j++) {
1102
                                        c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1103
                                }
1104
                        }
1105
                }
1106
        }
1107

    
1108
        av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1109
               mode->lumMode, mode->chromMode);
1110

    
1111
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1112
                width, height, QP_store, QPStride, 0, mode, c);
1113

    
1114
        width  = (width )>>c->hChromaSubSample;
1115
        height = (height)>>c->vChromaSubSample;
1116

    
1117
        if(mode->chromMode)
1118
        {
1119
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1120
                        width, height, QP_store, QPStride, 1, mode, c);
1121
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1122
                        width, height, QP_store, QPStride, 2, mode, c);
1123
        }
1124
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1125
        {
1126
                linecpy(dst[1], src[1], height, srcStride[1]);
1127
                linecpy(dst[2], src[2], height, srcStride[2]);
1128
        }
1129
        else
1130
        {
1131
                int y;
1132
                for(y=0; y<height; y++)
1133
                {
1134
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1135
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1136
                }
1137
        }
1138
}
1139