Statistics
| Branch: | Revision:

ffmpeg / libpostproc / postprocess.c @ 755bfeab

History | View | Annotate | Download (44 KB)

1
/*
2
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23
/**
24
 * @file postprocess.c
25
 * postprocessing.
26
 */
27

    
28
/*
29
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49

50
* i do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
53
e = allmost exact implementation (slightly different rounding,...)
54
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57
*/
58

    
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
        (the if/else stuff per block is slowing things down)
67
compare the quality & speed of all filters
68
split this huge file
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72
*/
73

    
74
//Changelog: use the Subversion log
75

    
76
#include "config.h"
77
#include "avutil.h"
78
#include <inttypes.h>
79
#include <stdio.h>
80
#include <stdlib.h>
81
#include <string.h>
82
#ifdef HAVE_MALLOC_H
83
#include <malloc.h>
84
#endif
85
//#undef HAVE_MMX2
86
//#define HAVE_3DNOW
87
//#undef HAVE_MMX
88
//#undef ARCH_X86
89
//#define DEBUG_BRIGHTNESS
90
#include "postprocess.h"
91
#include "postprocess_internal.h"
92

    
93
#include "mangle.h" //FIXME should be supressed
94

    
95
#ifdef HAVE_ALTIVEC_H
96
#include <altivec.h>
97
#endif
98

    
99
#define GET_MODE_BUFFER_SIZE 500
100
#define OPTIONS_ARRAY_SIZE 10
101
#define BLOCK_SIZE 8
102
#define TEMP_STRIDE 8
103
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
104

    
105
#if defined(ARCH_X86)
106
static DECLARE_ALIGNED(8, uint64_t attribute_used, w05)= 0x0005000500050005LL;
107
static DECLARE_ALIGNED(8, uint64_t attribute_used, w04)= 0x0004000400040004LL;
108
static DECLARE_ALIGNED(8, uint64_t attribute_used, w20)= 0x0020002000200020LL;
109
static DECLARE_ALIGNED(8, uint64_t attribute_used, b00)= 0x0000000000000000LL;
110
static DECLARE_ALIGNED(8, uint64_t attribute_used, b01)= 0x0101010101010101LL;
111
static DECLARE_ALIGNED(8, uint64_t attribute_used, b02)= 0x0202020202020202LL;
112
static DECLARE_ALIGNED(8, uint64_t attribute_used, b08)= 0x0808080808080808LL;
113
static DECLARE_ALIGNED(8, uint64_t attribute_used, b80)= 0x8080808080808080LL;
114
#endif
115

    
116
static uint8_t clip_table[3*256];
117
static uint8_t * const clip_tab= clip_table + 256;
118

    
119
static const int attribute_used deringThreshold= 20;
120

    
121

    
122
static struct PPFilter filters[]=
123
{
124
        {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
125
        {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
126
/*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
127
        {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
128
        {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
129
        {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
130
        {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
131
        {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
132
        {"dr", "dering",                1, 5, 6, DERING},
133
        {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
134
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
135
        {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
136
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
137
        {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
138
        {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
139
        {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
140
        {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
141
        {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
142
        {NULL, NULL,0,0,0,0} //End Marker
143
};
144

    
145
static const char *replaceTable[]=
146
{
147
        "default",      "hdeblock:a,vdeblock:a,dering:a",
148
        "de",           "hdeblock:a,vdeblock:a,dering:a",
149
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
150
        "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a",
151
        "ac",           "ha:a:128:7,va:a,dering:a",
152
        NULL //End Marker
153
};
154

    
155

    
156
#if defined(ARCH_X86)
157
static inline void prefetchnta(void *p)
158
{
159
        asm volatile(   "prefetchnta (%0)\n\t"
160
                : : "r" (p)
161
        );
162
}
163

    
164
static inline void prefetcht0(void *p)
165
{
166
        asm volatile(   "prefetcht0 (%0)\n\t"
167
                : : "r" (p)
168
        );
169
}
170

    
171
static inline void prefetcht1(void *p)
172
{
173
        asm volatile(   "prefetcht1 (%0)\n\t"
174
                : : "r" (p)
175
        );
176
}
177

    
178
static inline void prefetcht2(void *p)
179
{
180
        asm volatile(   "prefetcht2 (%0)\n\t"
181
                : : "r" (p)
182
        );
183
}
184
#endif
185

    
186
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
187

    
188
/**
189
 * Check if the given 8x8 Block is mostly "flat"
190
 */
191
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
192
{
193
        int numEq= 0;
194
        int y;
195
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
196
        const int dcThreshold= dcOffset*2 + 1;
197

    
198
        for(y=0; y<BLOCK_SIZE; y++)
199
        {
200
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
201
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
202
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
203
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
204
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
205
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
206
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
207
                src+= stride;
208
        }
209
        return numEq > c->ppMode.flatnessThreshold;
210
}
211

    
212
/**
213
 * Check if the middle 8x8 Block in the given 8x16 block is flat
214
 */
215
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
216
        int numEq= 0;
217
        int y;
218
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
219
        const int dcThreshold= dcOffset*2 + 1;
220

    
221
        src+= stride*4; // src points to begin of the 8x8 Block
222
        for(y=0; y<BLOCK_SIZE-1; y++)
223
        {
224
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
225
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
226
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
227
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
228
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
229
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
230
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
231
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
232
                src+= stride;
233
        }
234
        return numEq > c->ppMode.flatnessThreshold;
235
}
236

    
237
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
238
{
239
        int i;
240
#if 1
241
        for(i=0; i<2; i++){
242
                if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
243
                src += stride;
244
                if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
245
                src += stride;
246
                if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
247
                src += stride;
248
                if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
249
                src += stride;
250
        }
251
#else
252
        for(i=0; i<8; i++){
253
                if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
254
                src += stride;
255
        }
256
#endif
257
        return 1;
258
}
259

    
260
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
261
{
262
#if 1
263
#if 1
264
        int x;
265
        src+= stride*4;
266
        for(x=0; x<BLOCK_SIZE; x+=4)
267
        {
268
                if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
269
                if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
270
                if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
271
                if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
272
        }
273
#else
274
        int x;
275
        src+= stride*3;
276
        for(x=0; x<BLOCK_SIZE; x++)
277
        {
278
                if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
279
        }
280
#endif
281
        return 1;
282
#else
283
        int x;
284
        src+= stride*4;
285
        for(x=0; x<BLOCK_SIZE; x++)
286
        {
287
                int min=255;
288
                int max=0;
289
                int y;
290
                for(y=0; y<8; y++){
291
                        int v= src[x + y*stride];
292
                        if(v>max) max=v;
293
                        if(v<min) min=v;
294
                }
295
                if(max-min > 2*QP) return 0;
296
        }
297
        return 1;
298
#endif
299
}
300

    
301
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
302
        if( isHorizDC_C(src, stride, c) ){
303
                if( isHorizMinMaxOk_C(src, stride, c->QP) )
304
                        return 1;
305
                else
306
                        return 0;
307
        }else{
308
                return 2;
309
        }
310
}
311

    
312
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
313
        if( isVertDC_C(src, stride, c) ){
314
                if( isVertMinMaxOk_C(src, stride, c->QP) )
315
                        return 1;
316
                else
317
                        return 0;
318
        }else{
319
                return 2;
320
        }
321
}
322

    
323
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
324
{
325
        int y;
326
        for(y=0; y<BLOCK_SIZE; y++)
327
        {
328
                const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
329

    
330
                if(FFABS(middleEnergy) < 8*c->QP)
331
                {
332
                        const int q=(dst[3] - dst[4])/2;
333
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
334
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
335

    
336
                        int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
337
                        d= FFMAX(d, 0);
338

    
339
                        d= (5*d + 32) >> 6;
340
                        d*= FFSIGN(-middleEnergy);
341

    
342
                        if(q>0)
343
                        {
344
                                d= d<0 ? 0 : d;
345
                                d= d>q ? q : d;
346
                        }
347
                        else
348
                        {
349
                                d= d>0 ? 0 : d;
350
                                d= d<q ? q : d;
351
                        }
352

    
353
                        dst[3]-= d;
354
                        dst[4]+= d;
355
                }
356
                dst+= stride;
357
        }
358
}
359

    
360
/**
361
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
362
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
363
 */
364
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
365
{
366
        int y;
367
        for(y=0; y<BLOCK_SIZE; y++)
368
        {
369
                const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
370
                const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
371

    
372
                int sums[10];
373
                sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
374
                sums[1] = sums[0] - first  + dst[3];
375
                sums[2] = sums[1] - first  + dst[4];
376
                sums[3] = sums[2] - first  + dst[5];
377
                sums[4] = sums[3] - first  + dst[6];
378
                sums[5] = sums[4] - dst[0] + dst[7];
379
                sums[6] = sums[5] - dst[1] + last;
380
                sums[7] = sums[6] - dst[2] + last;
381
                sums[8] = sums[7] - dst[3] + last;
382
                sums[9] = sums[8] - dst[4] + last;
383

    
384
                dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
385
                dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
386
                dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
387
                dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
388
                dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
389
                dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
390
                dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
391
                dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
392

    
393
                dst+= stride;
394
        }
395
}
396

    
397
/**
398
 * Experimental Filter 1 (Horizontal)
399
 * will not damage linear gradients
400
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
401
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
402
 * MMX2 version does correct clipping C version does not
403
 * not identical with the vertical one
404
 */
405
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
406
{
407
        int y;
408
        static uint64_t *lut= NULL;
409
        if(lut==NULL)
410
        {
411
                int i;
412
                lut = av_malloc(256*8);
413
                for(i=0; i<256; i++)
414
                {
415
                        int v= i < 128 ? 2*i : 2*(i-256);
416
/*
417
//Simulate 112242211 9-Tap filter
418
                        uint64_t a= (v/16) & 0xFF;
419
                        uint64_t b= (v/8) & 0xFF;
420
                        uint64_t c= (v/4) & 0xFF;
421
                        uint64_t d= (3*v/8) & 0xFF;
422
*/
423
//Simulate piecewise linear interpolation
424
                        uint64_t a= (v/16) & 0xFF;
425
                        uint64_t b= (v*3/16) & 0xFF;
426
                        uint64_t c= (v*5/16) & 0xFF;
427
                        uint64_t d= (7*v/16) & 0xFF;
428
                        uint64_t A= (0x100 - a)&0xFF;
429
                        uint64_t B= (0x100 - b)&0xFF;
430
                        uint64_t C= (0x100 - c)&0xFF;
431
                        uint64_t D= (0x100 - c)&0xFF;
432

    
433
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
434
                                (D<<24) | (C<<16) | (B<<8) | (A);
435
                        //lut[i] = (v<<32) | (v<<24);
436
                }
437
        }
438

    
439
        for(y=0; y<BLOCK_SIZE; y++)
440
        {
441
                int a= src[1] - src[2];
442
                int b= src[3] - src[4];
443
                int c= src[5] - src[6];
444

    
445
                int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
446

    
447
                if(d < QP)
448
                {
449
                        int v = d * FFSIGN(-b);
450

    
451
                        src[1] +=v/8;
452
                        src[2] +=v/4;
453
                        src[3] +=3*v/8;
454
                        src[4] -=3*v/8;
455
                        src[5] -=v/4;
456
                        src[6] -=v/8;
457

    
458
                }
459
                src+=stride;
460
        }
461
}
462

    
463
/**
464
 * accurate deblock filter
465
 */
466
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
467
        int y;
468
        const int QP= c->QP;
469
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
470
        const int dcThreshold= dcOffset*2 + 1;
471
//START_TIMER
472
        src+= step*4; // src points to begin of the 8x8 Block
473
        for(y=0; y<8; y++){
474
                int numEq= 0;
475

    
476
                if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
477
                if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
478
                if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
479
                if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
480
                if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
481
                if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
482
                if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
483
                if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
484
                if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
485
                if(numEq > c->ppMode.flatnessThreshold){
486
                        int min, max, x;
487

    
488
                        if(src[0] > src[step]){
489
                            max= src[0];
490
                            min= src[step];
491
                        }else{
492
                            max= src[step];
493
                            min= src[0];
494
                        }
495
                        for(x=2; x<8; x+=2){
496
                                if(src[x*step] > src[(x+1)*step]){
497
                                        if(src[x    *step] > max) max= src[ x   *step];
498
                                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
499
                                }else{
500
                                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
501
                                        if(src[ x   *step] < min) min= src[ x   *step];
502
                                }
503
                        }
504
                        if(max-min < 2*QP){
505
                                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
506
                                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
507

    
508
                                int sums[10];
509
                                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
510
                                sums[1] = sums[0] - first       + src[3*step];
511
                                sums[2] = sums[1] - first       + src[4*step];
512
                                sums[3] = sums[2] - first       + src[5*step];
513
                                sums[4] = sums[3] - first       + src[6*step];
514
                                sums[5] = sums[4] - src[0*step] + src[7*step];
515
                                sums[6] = sums[5] - src[1*step] + last;
516
                                sums[7] = sums[6] - src[2*step] + last;
517
                                sums[8] = sums[7] - src[3*step] + last;
518
                                sums[9] = sums[8] - src[4*step] + last;
519

    
520
                                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
521
                                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
522
                                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
523
                                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
524
                                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
525
                                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
526
                                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
527
                                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
528
                        }
529
                }else{
530
                        const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
531

    
532
                        if(FFABS(middleEnergy) < 8*QP)
533
                        {
534
                                const int q=(src[3*step] - src[4*step])/2;
535
                                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
536
                                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
537

    
538
                                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
539
                                d= FFMAX(d, 0);
540

    
541
                                d= (5*d + 32) >> 6;
542
                                d*= FFSIGN(-middleEnergy);
543

    
544
                                if(q>0)
545
                                {
546
                                        d= d<0 ? 0 : d;
547
                                        d= d>q ? q : d;
548
                                }
549
                                else
550
                                {
551
                                        d= d>0 ? 0 : d;
552
                                        d= d<q ? q : d;
553
                                }
554

    
555
                                src[3*step]-= d;
556
                                src[4*step]+= d;
557
                        }
558
                }
559

    
560
                src += stride;
561
        }
562
/*if(step==16){
563
    STOP_TIMER("step16")
564
}else{
565
    STOP_TIMER("stepX")
566
}*/
567
}
568

    
569
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
570
//Plain C versions
571
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
572
#define COMPILE_C
573
#endif
574

    
575
#ifdef ARCH_POWERPC
576
#ifdef HAVE_ALTIVEC
577
#define COMPILE_ALTIVEC
578
#endif //HAVE_ALTIVEC
579
#endif //ARCH_POWERPC
580

    
581
#if defined(ARCH_X86)
582

    
583
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
584
#define COMPILE_MMX
585
#endif
586

    
587
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
588
#define COMPILE_MMX2
589
#endif
590

    
591
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
592
#define COMPILE_3DNOW
593
#endif
594
#endif /* defined(ARCH_X86) */
595

    
596
#undef HAVE_MMX
597
#undef HAVE_MMX2
598
#undef HAVE_3DNOW
599
#undef HAVE_ALTIVEC
600

    
601
#ifdef COMPILE_C
602
#undef HAVE_MMX
603
#undef HAVE_MMX2
604
#undef HAVE_3DNOW
605
#define RENAME(a) a ## _C
606
#include "postprocess_template.c"
607
#endif
608

    
609
#ifdef ARCH_POWERPC
610
#ifdef COMPILE_ALTIVEC
611
#undef RENAME
612
#define HAVE_ALTIVEC
613
#define RENAME(a) a ## _altivec
614
#include "postprocess_altivec_template.c"
615
#include "postprocess_template.c"
616
#endif
617
#endif //ARCH_POWERPC
618

    
619
//MMX versions
620
#ifdef COMPILE_MMX
621
#undef RENAME
622
#define HAVE_MMX
623
#undef HAVE_MMX2
624
#undef HAVE_3DNOW
625
#define RENAME(a) a ## _MMX
626
#include "postprocess_template.c"
627
#endif
628

    
629
//MMX2 versions
630
#ifdef COMPILE_MMX2
631
#undef RENAME
632
#define HAVE_MMX
633
#define HAVE_MMX2
634
#undef HAVE_3DNOW
635
#define RENAME(a) a ## _MMX2
636
#include "postprocess_template.c"
637
#endif
638

    
639
//3DNOW versions
640
#ifdef COMPILE_3DNOW
641
#undef RENAME
642
#define HAVE_MMX
643
#undef HAVE_MMX2
644
#define HAVE_3DNOW
645
#define RENAME(a) a ## _3DNow
646
#include "postprocess_template.c"
647
#endif
648

    
649
// minor note: the HAVE_xyz is messed up after that line so do not use it.
650

    
651
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
652
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
653
{
654
        PPContext *c= (PPContext *)vc;
655
        PPMode *ppMode= (PPMode *)vm;
656
        c->ppMode= *ppMode; //FIXME
657

    
658
        // Using ifs here as they are faster than function pointers although the
659
        // difference would not be measureable here but it is much better because
660
        // someone might exchange the CPU whithout restarting MPlayer ;)
661
#ifdef RUNTIME_CPUDETECT
662
#if defined(ARCH_X86)
663
        // ordered per speed fasterst first
664
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
665
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
666
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
667
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
668
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
669
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670
        else
671
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
672
#else
673
#ifdef ARCH_POWERPC
674
#ifdef HAVE_ALTIVEC
675
        if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
676
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
677
        else
678
#endif
679
#endif
680
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
681
#endif
682
#else //RUNTIME_CPUDETECT
683
#ifdef HAVE_MMX2
684
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685
#elif defined (HAVE_3DNOW)
686
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687
#elif defined (HAVE_MMX)
688
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689
#elif defined (HAVE_ALTIVEC)
690
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
691
#else
692
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
693
#endif
694
#endif //!RUNTIME_CPUDETECT
695
}
696

    
697
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
698
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
699

    
700
/* -pp Command line Help
701
*/
702
char *pp_help=
703
"Available postprocessing filters:\n"
704
"Filters                        Options\n"
705
"short  long name       short   long option     Description\n"
706
"*      *               a       autoq           CPU power dependent enabler\n"
707
"                       c       chrom           chrominance filtering enabled\n"
708
"                       y       nochrom         chrominance filtering disabled\n"
709
"                       n       noluma          luma filtering disabled\n"
710
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
711
"       1. difference factor: default=32, higher -> more deblocking\n"
712
"       2. flatness threshold: default=39, lower -> more deblocking\n"
713
"                       the h & v deblocking filters share these\n"
714
"                       so you can't set different thresholds for h / v\n"
715
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
716
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
717
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
718
"h1     x1hdeblock                              experimental h deblock filter 1\n"
719
"v1     x1vdeblock                              experimental v deblock filter 1\n"
720
"dr     dering                                  deringing filter\n"
721
"al     autolevels                              automatic brightness / contrast\n"
722
"                       f        fullyrange     stretch luminance to (0..255)\n"
723
"lb     linblenddeint                           linear blend deinterlacer\n"
724
"li     linipoldeint                            linear interpolating deinterlace\n"
725
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
726
"md     mediandeint                             median deinterlacer\n"
727
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
728
"l5     lowpass5                                FIR lowpass deinterlacer\n"
729
"de     default                                 hb:a,vb:a,dr:a\n"
730
"fa     fast                                    h1:a,v1:a,dr:a\n"
731
"ac                                             ha:a:128:7,va:a,dr:a\n"
732
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
733
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
734
"fq     forceQuant      <quantizer>             force quantizer\n"
735
"Usage:\n"
736
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
737
"long form example:\n"
738
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
739
"short form example:\n"
740
"vb:a/hb:a/lb                                   de,-vb\n"
741
"more examples:\n"
742
"tn:64:128:256\n"
743
"\n"
744
;
745

    
746
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
747
{
748
        char temp[GET_MODE_BUFFER_SIZE];
749
        char *p= temp;
750
        const char *filterDelimiters= ",/";
751
        const char *optionDelimiters= ":";
752
        struct PPMode *ppMode;
753
        char *filterToken;
754

    
755
        ppMode= av_malloc(sizeof(PPMode));
756

    
757
        ppMode->lumMode= 0;
758
        ppMode->chromMode= 0;
759
        ppMode->maxTmpNoise[0]= 700;
760
        ppMode->maxTmpNoise[1]= 1500;
761
        ppMode->maxTmpNoise[2]= 3000;
762
        ppMode->maxAllowedY= 234;
763
        ppMode->minAllowedY= 16;
764
        ppMode->baseDcDiff= 256/8;
765
        ppMode->flatnessThreshold= 56-16-1;
766
        ppMode->maxClippedThreshold= 0.01;
767
        ppMode->error=0;
768

    
769
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
770

    
771
        av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
772

    
773
        for(;;){
774
                char *filterName;
775
                int q= 1000000; //PP_QUALITY_MAX;
776
                int chrom=-1;
777
                int luma=-1;
778
                char *option;
779
                char *options[OPTIONS_ARRAY_SIZE];
780
                int i;
781
                int filterNameOk=0;
782
                int numOfUnknownOptions=0;
783
                int enable=1; //does the user want us to enabled or disabled the filter
784

    
785
                filterToken= strtok(p, filterDelimiters);
786
                if(filterToken == NULL) break;
787
                p+= strlen(filterToken) + 1; // p points to next filterToken
788
                filterName= strtok(filterToken, optionDelimiters);
789
                av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
790

    
791
                if(*filterName == '-')
792
                {
793
                        enable=0;
794
                        filterName++;
795
                }
796

    
797
                for(;;){ //for all options
798
                        option= strtok(NULL, optionDelimiters);
799
                        if(option == NULL) break;
800

    
801
                        av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
802
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
803
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
804
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
805
                        else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
806
                        else
807
                        {
808
                                options[numOfUnknownOptions] = option;
809
                                numOfUnknownOptions++;
810
                        }
811
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
812
                }
813
                options[numOfUnknownOptions] = NULL;
814

    
815
                /* replace stuff from the replace Table */
816
                for(i=0; replaceTable[2*i]!=NULL; i++)
817
                {
818
                        if(!strcmp(replaceTable[2*i], filterName))
819
                        {
820
                                int newlen= strlen(replaceTable[2*i + 1]);
821
                                int plen;
822
                                int spaceLeft;
823

    
824
                                if(p==NULL) p= temp, *p=0;      //last filter
825
                                else p--, *p=',';               //not last filter
826

    
827
                                plen= strlen(p);
828
                                spaceLeft= p - temp + plen;
829
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
830
                                {
831
                                        ppMode->error++;
832
                                        break;
833
                                }
834
                                memmove(p + newlen, p, plen+1);
835
                                memcpy(p, replaceTable[2*i + 1], newlen);
836
                                filterNameOk=1;
837
                        }
838
                }
839

    
840
                for(i=0; filters[i].shortName!=NULL; i++)
841
                {
842
                        if(   !strcmp(filters[i].longName, filterName)
843
                           || !strcmp(filters[i].shortName, filterName))
844
                        {
845
                                ppMode->lumMode &= ~filters[i].mask;
846
                                ppMode->chromMode &= ~filters[i].mask;
847

    
848
                                filterNameOk=1;
849
                                if(!enable) break; // user wants to disable it
850

    
851
                                if(q >= filters[i].minLumQuality && luma)
852
                                        ppMode->lumMode|= filters[i].mask;
853
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
854
                                        if(q >= filters[i].minChromQuality)
855
                                                ppMode->chromMode|= filters[i].mask;
856

    
857
                                if(filters[i].mask == LEVEL_FIX)
858
                                {
859
                                        int o;
860
                                        ppMode->minAllowedY= 16;
861
                                        ppMode->maxAllowedY= 234;
862
                                        for(o=0; options[o]!=NULL; o++)
863
                                        {
864
                                                if(  !strcmp(options[o],"fullyrange")
865
                                                   ||!strcmp(options[o],"f"))
866
                                                {
867
                                                        ppMode->minAllowedY= 0;
868
                                                        ppMode->maxAllowedY= 255;
869
                                                        numOfUnknownOptions--;
870
                                                }
871
                                        }
872
                                }
873
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
874
                                {
875
                                        int o;
876
                                        int numOfNoises=0;
877

    
878
                                        for(o=0; options[o]!=NULL; o++)
879
                                        {
880
                                                char *tail;
881
                                                ppMode->maxTmpNoise[numOfNoises]=
882
                                                        strtol(options[o], &tail, 0);
883
                                                if(tail!=options[o])
884
                                                {
885
                                                        numOfNoises++;
886
                                                        numOfUnknownOptions--;
887
                                                        if(numOfNoises >= 3) break;
888
                                                }
889
                                        }
890
                                }
891
                                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
892
                                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
893
                                {
894
                                        int o;
895

    
896
                                        for(o=0; options[o]!=NULL && o<2; o++)
897
                                        {
898
                                                char *tail;
899
                                                int val= strtol(options[o], &tail, 0);
900
                                                if(tail==options[o]) break;
901

    
902
                                                numOfUnknownOptions--;
903
                                                if(o==0) ppMode->baseDcDiff= val;
904
                                                else ppMode->flatnessThreshold= val;
905
                                        }
906
                                }
907
                                else if(filters[i].mask == FORCE_QUANT)
908
                                {
909
                                        int o;
910
                                        ppMode->forcedQuant= 15;
911

    
912
                                        for(o=0; options[o]!=NULL && o<1; o++)
913
                                        {
914
                                                char *tail;
915
                                                int val= strtol(options[o], &tail, 0);
916
                                                if(tail==options[o]) break;
917

    
918
                                                numOfUnknownOptions--;
919
                                                ppMode->forcedQuant= val;
920
                                        }
921
                                }
922
                        }
923
                }
924
                if(!filterNameOk) ppMode->error++;
925
                ppMode->error += numOfUnknownOptions;
926
        }
927

    
928
        av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
929
        if(ppMode->error)
930
        {
931
                av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
932
                av_free(ppMode);
933
                return NULL;
934
        }
935
        return ppMode;
936
}
937

    
938
void pp_free_mode(pp_mode_t *mode){
939
    av_free(mode);
940
}
941

    
942
static void reallocAlign(void **p, int alignment, int size){
943
        av_free(*p);
944
        *p= av_mallocz(size);
945
}
946

    
947
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
948
        int mbWidth = (width+15)>>4;
949
        int mbHeight= (height+15)>>4;
950
        int i;
951

    
952
        c->stride= stride;
953
        c->qpStride= qpStride;
954

    
955
        reallocAlign((void **)&c->tempDst, 8, stride*24);
956
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
957
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
958
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
959
        for(i=0; i<256; i++)
960
                c->yHistogram[i]= width*height/64*15/256;
961

    
962
        for(i=0; i<3; i++)
963
        {
964
                //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
965
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
966
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
967
        }
968

    
969
        reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
970
        reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
971
        reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
972
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
973
}
974

    
975
static void global_init(void){
976
        int i;
977
        memset(clip_table, 0, 256);
978
        for(i=256; i<512; i++)
979
                clip_table[i]= i;
980
        memset(clip_table+512, 0, 256);
981
}
982

    
983
static const char * context_to_name(void * ptr) {
984
    return "postproc";
985
}
986

    
987
static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
988

    
989
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
990
        PPContext *c= av_malloc(sizeof(PPContext));
991
        int stride= (width+15)&(~15);    //assumed / will realloc if needed
992
        int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
993

    
994
        global_init();
995

    
996
        memset(c, 0, sizeof(PPContext));
997
        c->av_class = &av_codec_context_class;
998
        c->cpuCaps= cpuCaps;
999
        if(cpuCaps&PP_FORMAT){
1000
                c->hChromaSubSample= cpuCaps&0x3;
1001
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
1002
        }else{
1003
                c->hChromaSubSample= 1;
1004
                c->vChromaSubSample= 1;
1005
        }
1006

    
1007
        reallocBuffers(c, width, height, stride, qpStride);
1008

    
1009
        c->frameNum=-1;
1010

    
1011
        return c;
1012
}
1013

    
1014
void pp_free_context(void *vc){
1015
        PPContext *c = (PPContext*)vc;
1016
        int i;
1017

    
1018
        for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1019
        for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1020

    
1021
        av_free(c->tempBlocks);
1022
        av_free(c->yHistogram);
1023
        av_free(c->tempDst);
1024
        av_free(c->tempSrc);
1025
        av_free(c->deintTemp);
1026
        av_free(c->stdQPTable);
1027
        av_free(c->nonBQPTable);
1028
        av_free(c->forcedQPTable);
1029

    
1030
        memset(c, 0, sizeof(PPContext));
1031

    
1032
        av_free(c);
1033
}
1034

    
1035
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
1036
                 uint8_t * dst[3], int dstStride[3],
1037
                 int width, int height,
1038
                 QP_STORE_T *QP_store,  int QPStride,
1039
                 pp_mode_t *vm,  void *vc, int pict_type)
1040
{
1041
        int mbWidth = (width+15)>>4;
1042
        int mbHeight= (height+15)>>4;
1043
        PPMode *mode = (PPMode*)vm;
1044
        PPContext *c = (PPContext*)vc;
1045
        int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1046
        int absQPStride = FFABS(QPStride);
1047

    
1048
        // c->stride and c->QPStride are always positive
1049
        if(c->stride < minStride || c->qpStride < absQPStride)
1050
                reallocBuffers(c, width, height,
1051
                                FFMAX(minStride, c->stride),
1052
                                FFMAX(c->qpStride, absQPStride));
1053

    
1054
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1055
        {
1056
                int i;
1057
                QP_store= c->forcedQPTable;
1058
                absQPStride = QPStride = 0;
1059
                if(mode->lumMode & FORCE_QUANT)
1060
                        for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1061
                else
1062
                        for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1063
        }
1064

    
1065
        if(pict_type & PP_PICT_TYPE_QP2){
1066
                int i;
1067
                const int count= mbHeight * absQPStride;
1068
                for(i=0; i<(count>>2); i++){
1069
                        ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1070
                }
1071
                for(i<<=2; i<count; i++){
1072
                        c->stdQPTable[i] = QP_store[i]>>1;
1073
                }
1074
                QP_store= c->stdQPTable;
1075
                QPStride= absQPStride;
1076
        }
1077

    
1078
if(0){
1079
int x,y;
1080
for(y=0; y<mbHeight; y++){
1081
        for(x=0; x<mbWidth; x++){
1082
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1083
        }
1084
        av_log(c, AV_LOG_INFO, "\n");
1085
}
1086
        av_log(c, AV_LOG_INFO, "\n");
1087
}
1088

    
1089
        if((pict_type&7)!=3)
1090
        {
1091
                if (QPStride >= 0) {
1092
                        int i;
1093
                        const int count= mbHeight * QPStride;
1094
                        for(i=0; i<(count>>2); i++){
1095
                                ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1096
                        }
1097
                        for(i<<=2; i<count; i++){
1098
                                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1099
                        }
1100
                } else {
1101
                        int i,j;
1102
                        for(i=0; i<mbHeight; i++) {
1103
                                    for(j=0; j<absQPStride; j++) {
1104
                                        c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1105
                                }
1106
                        }
1107
                }
1108
        }
1109

    
1110
        av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1111
               mode->lumMode, mode->chromMode);
1112

    
1113
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1114
                width, height, QP_store, QPStride, 0, mode, c);
1115

    
1116
        width  = (width )>>c->hChromaSubSample;
1117
        height = (height)>>c->vChromaSubSample;
1118

    
1119
        if(mode->chromMode)
1120
        {
1121
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1122
                        width, height, QP_store, QPStride, 1, mode, c);
1123
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1124
                        width, height, QP_store, QPStride, 2, mode, c);
1125
        }
1126
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1127
        {
1128
                linecpy(dst[1], src[1], height, srcStride[1]);
1129
                linecpy(dst[2], src[2], height, srcStride[2]);
1130
        }
1131
        else
1132
        {
1133
                int y;
1134
                for(y=0; y<height; y++)
1135
                {
1136
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1137
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1138
                }
1139
        }
1140
}
1141