Statistics
| Branch: | Revision:

ffmpeg / libpostproc / postprocess.c @ 2cab6401

History | View | Annotate | Download (43.6 KB)

1
/*
2
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23
/**
24
 * @file postprocess.c
25
 * postprocessing.
26
 */
27

    
28
/*
29
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49

50
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
53
e = allmost exact implementation (slightly different rounding,...)
54
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57
*/
58

    
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
        (the if/else stuff per block is slowing things down)
67
compare the quality & speed of all filters
68
split this huge file
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72
*/
73

    
74
//Changelog: use the Subversion log
75

    
76
#include "config.h"
77
#include "avutil.h"
78
#include <inttypes.h>
79
#include <stdio.h>
80
#include <stdlib.h>
81
#include <string.h>
82
#ifdef HAVE_MALLOC_H
83
#include <malloc.h>
84
#endif
85
//#undef HAVE_MMX2
86
//#define HAVE_3DNOW
87
//#undef HAVE_MMX
88
//#undef ARCH_X86
89
//#define DEBUG_BRIGHTNESS
90
#include "postprocess.h"
91
#include "postprocess_internal.h"
92

    
93
#ifdef HAVE_ALTIVEC_H
94
#include <altivec.h>
95
#endif
96

    
97
#define GET_MODE_BUFFER_SIZE 500
98
#define OPTIONS_ARRAY_SIZE 10
99
#define BLOCK_SIZE 8
100
#define TEMP_STRIDE 8
101
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
102

    
103
#if defined(ARCH_X86)
104
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
105
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
106
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
107
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
108
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
109
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
110
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
111
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
112
#endif
113

    
114
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
115

    
116

    
117
static struct PPFilter filters[]=
118
{
119
        {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
120
        {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
121
/*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
122
        {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
123
        {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
124
        {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
125
        {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
126
        {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
127
        {"dr", "dering",                1, 5, 6, DERING},
128
        {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
129
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
130
        {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
131
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
132
        {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
133
        {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
134
        {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
135
        {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
136
        {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
137
        {NULL, NULL,0,0,0,0} //End Marker
138
};
139

    
140
static const char *replaceTable[]=
141
{
142
        "default",      "hb:a,vb:a,dr:a",
143
        "de",           "hb:a,vb:a,dr:a",
144
        "fast",         "h1:a,v1:a,dr:a",
145
        "fa",           "h1:a,v1:a,dr:a",
146
        "ac",           "ha:a:128:7,va:a,dr:a",
147
        NULL //End Marker
148
};
149

    
150

    
151
#if defined(ARCH_X86)
152
static inline void prefetchnta(void *p)
153
{
154
        asm volatile(   "prefetchnta (%0)\n\t"
155
                : : "r" (p)
156
        );
157
}
158

    
159
static inline void prefetcht0(void *p)
160
{
161
        asm volatile(   "prefetcht0 (%0)\n\t"
162
                : : "r" (p)
163
        );
164
}
165

    
166
static inline void prefetcht1(void *p)
167
{
168
        asm volatile(   "prefetcht1 (%0)\n\t"
169
                : : "r" (p)
170
        );
171
}
172

    
173
static inline void prefetcht2(void *p)
174
{
175
        asm volatile(   "prefetcht2 (%0)\n\t"
176
                : : "r" (p)
177
        );
178
}
179
#endif
180

    
181
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
182

    
183
/**
184
 * Check if the given 8x8 Block is mostly "flat"
185
 */
186
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
187
{
188
        int numEq= 0;
189
        int y;
190
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
191
        const int dcThreshold= dcOffset*2 + 1;
192

    
193
        for(y=0; y<BLOCK_SIZE; y++)
194
        {
195
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
196
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
197
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
198
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
199
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
200
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
201
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
202
                src+= stride;
203
        }
204
        return numEq > c->ppMode.flatnessThreshold;
205
}
206

    
207
/**
208
 * Check if the middle 8x8 Block in the given 8x16 block is flat
209
 */
210
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
211
        int numEq= 0;
212
        int y;
213
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
214
        const int dcThreshold= dcOffset*2 + 1;
215

    
216
        src+= stride*4; // src points to begin of the 8x8 Block
217
        for(y=0; y<BLOCK_SIZE-1; y++)
218
        {
219
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
220
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
221
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
222
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
223
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
224
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
225
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
226
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
227
                src+= stride;
228
        }
229
        return numEq > c->ppMode.flatnessThreshold;
230
}
231

    
232
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
233
{
234
        int i;
235
#if 1
236
        for(i=0; i<2; i++){
237
                if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
238
                src += stride;
239
                if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
240
                src += stride;
241
                if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
242
                src += stride;
243
                if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
244
                src += stride;
245
        }
246
#else
247
        for(i=0; i<8; i++){
248
                if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
249
                src += stride;
250
        }
251
#endif
252
        return 1;
253
}
254

    
255
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
256
{
257
#if 1
258
#if 1
259
        int x;
260
        src+= stride*4;
261
        for(x=0; x<BLOCK_SIZE; x+=4)
262
        {
263
                if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
264
                if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
265
                if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
266
                if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
267
        }
268
#else
269
        int x;
270
        src+= stride*3;
271
        for(x=0; x<BLOCK_SIZE; x++)
272
        {
273
                if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
274
        }
275
#endif
276
        return 1;
277
#else
278
        int x;
279
        src+= stride*4;
280
        for(x=0; x<BLOCK_SIZE; x++)
281
        {
282
                int min=255;
283
                int max=0;
284
                int y;
285
                for(y=0; y<8; y++){
286
                        int v= src[x + y*stride];
287
                        if(v>max) max=v;
288
                        if(v<min) min=v;
289
                }
290
                if(max-min > 2*QP) return 0;
291
        }
292
        return 1;
293
#endif
294
}
295

    
296
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
297
        if( isHorizDC_C(src, stride, c) ){
298
                if( isHorizMinMaxOk_C(src, stride, c->QP) )
299
                        return 1;
300
                else
301
                        return 0;
302
        }else{
303
                return 2;
304
        }
305
}
306

    
307
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
308
        if( isVertDC_C(src, stride, c) ){
309
                if( isVertMinMaxOk_C(src, stride, c->QP) )
310
                        return 1;
311
                else
312
                        return 0;
313
        }else{
314
                return 2;
315
        }
316
}
317

    
318
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
319
{
320
        int y;
321
        for(y=0; y<BLOCK_SIZE; y++)
322
        {
323
                const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
324

    
325
                if(FFABS(middleEnergy) < 8*c->QP)
326
                {
327
                        const int q=(dst[3] - dst[4])/2;
328
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
329
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
330

    
331
                        int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
332
                        d= FFMAX(d, 0);
333

    
334
                        d= (5*d + 32) >> 6;
335
                        d*= FFSIGN(-middleEnergy);
336

    
337
                        if(q>0)
338
                        {
339
                                d= d<0 ? 0 : d;
340
                                d= d>q ? q : d;
341
                        }
342
                        else
343
                        {
344
                                d= d>0 ? 0 : d;
345
                                d= d<q ? q : d;
346
                        }
347

    
348
                        dst[3]-= d;
349
                        dst[4]+= d;
350
                }
351
                dst+= stride;
352
        }
353
}
354

    
355
/**
356
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
357
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
358
 */
359
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
360
{
361
        int y;
362
        for(y=0; y<BLOCK_SIZE; y++)
363
        {
364
                const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
365
                const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
366

    
367
                int sums[10];
368
                sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
369
                sums[1] = sums[0] - first  + dst[3];
370
                sums[2] = sums[1] - first  + dst[4];
371
                sums[3] = sums[2] - first  + dst[5];
372
                sums[4] = sums[3] - first  + dst[6];
373
                sums[5] = sums[4] - dst[0] + dst[7];
374
                sums[6] = sums[5] - dst[1] + last;
375
                sums[7] = sums[6] - dst[2] + last;
376
                sums[8] = sums[7] - dst[3] + last;
377
                sums[9] = sums[8] - dst[4] + last;
378

    
379
                dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
380
                dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
381
                dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
382
                dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
383
                dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
384
                dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
385
                dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
386
                dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
387

    
388
                dst+= stride;
389
        }
390
}
391

    
392
/**
393
 * Experimental Filter 1 (Horizontal)
394
 * will not damage linear gradients
395
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
396
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
397
 * MMX2 version does correct clipping C version does not
398
 * not identical with the vertical one
399
 */
400
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
401
{
402
        int y;
403
        static uint64_t *lut= NULL;
404
        if(lut==NULL)
405
        {
406
                int i;
407
                lut = av_malloc(256*8);
408
                for(i=0; i<256; i++)
409
                {
410
                        int v= i < 128 ? 2*i : 2*(i-256);
411
/*
412
//Simulate 112242211 9-Tap filter
413
                        uint64_t a= (v/16) & 0xFF;
414
                        uint64_t b= (v/8) & 0xFF;
415
                        uint64_t c= (v/4) & 0xFF;
416
                        uint64_t d= (3*v/8) & 0xFF;
417
*/
418
//Simulate piecewise linear interpolation
419
                        uint64_t a= (v/16) & 0xFF;
420
                        uint64_t b= (v*3/16) & 0xFF;
421
                        uint64_t c= (v*5/16) & 0xFF;
422
                        uint64_t d= (7*v/16) & 0xFF;
423
                        uint64_t A= (0x100 - a)&0xFF;
424
                        uint64_t B= (0x100 - b)&0xFF;
425
                        uint64_t C= (0x100 - c)&0xFF;
426
                        uint64_t D= (0x100 - c)&0xFF;
427

    
428
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
429
                                (D<<24) | (C<<16) | (B<<8) | (A);
430
                        //lut[i] = (v<<32) | (v<<24);
431
                }
432
        }
433

    
434
        for(y=0; y<BLOCK_SIZE; y++)
435
        {
436
                int a= src[1] - src[2];
437
                int b= src[3] - src[4];
438
                int c= src[5] - src[6];
439

    
440
                int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
441

    
442
                if(d < QP)
443
                {
444
                        int v = d * FFSIGN(-b);
445

    
446
                        src[1] +=v/8;
447
                        src[2] +=v/4;
448
                        src[3] +=3*v/8;
449
                        src[4] -=3*v/8;
450
                        src[5] -=v/4;
451
                        src[6] -=v/8;
452

    
453
                }
454
                src+=stride;
455
        }
456
}
457

    
458
/**
459
 * accurate deblock filter
460
 */
461
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
462
        int y;
463
        const int QP= c->QP;
464
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
465
        const int dcThreshold= dcOffset*2 + 1;
466
//START_TIMER
467
        src+= step*4; // src points to begin of the 8x8 Block
468
        for(y=0; y<8; y++){
469
                int numEq= 0;
470

    
471
                if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
472
                if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
473
                if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
474
                if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
475
                if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
476
                if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
477
                if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
478
                if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
479
                if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
480
                if(numEq > c->ppMode.flatnessThreshold){
481
                        int min, max, x;
482

    
483
                        if(src[0] > src[step]){
484
                            max= src[0];
485
                            min= src[step];
486
                        }else{
487
                            max= src[step];
488
                            min= src[0];
489
                        }
490
                        for(x=2; x<8; x+=2){
491
                                if(src[x*step] > src[(x+1)*step]){
492
                                        if(src[x    *step] > max) max= src[ x   *step];
493
                                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
494
                                }else{
495
                                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
496
                                        if(src[ x   *step] < min) min= src[ x   *step];
497
                                }
498
                        }
499
                        if(max-min < 2*QP){
500
                                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
501
                                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
502

    
503
                                int sums[10];
504
                                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
505
                                sums[1] = sums[0] - first       + src[3*step];
506
                                sums[2] = sums[1] - first       + src[4*step];
507
                                sums[3] = sums[2] - first       + src[5*step];
508
                                sums[4] = sums[3] - first       + src[6*step];
509
                                sums[5] = sums[4] - src[0*step] + src[7*step];
510
                                sums[6] = sums[5] - src[1*step] + last;
511
                                sums[7] = sums[6] - src[2*step] + last;
512
                                sums[8] = sums[7] - src[3*step] + last;
513
                                sums[9] = sums[8] - src[4*step] + last;
514

    
515
                                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
516
                                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
517
                                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
518
                                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
519
                                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
520
                                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
521
                                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
522
                                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
523
                        }
524
                }else{
525
                        const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
526

    
527
                        if(FFABS(middleEnergy) < 8*QP)
528
                        {
529
                                const int q=(src[3*step] - src[4*step])/2;
530
                                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
531
                                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
532

    
533
                                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
534
                                d= FFMAX(d, 0);
535

    
536
                                d= (5*d + 32) >> 6;
537
                                d*= FFSIGN(-middleEnergy);
538

    
539
                                if(q>0)
540
                                {
541
                                        d= d<0 ? 0 : d;
542
                                        d= d>q ? q : d;
543
                                }
544
                                else
545
                                {
546
                                        d= d>0 ? 0 : d;
547
                                        d= d<q ? q : d;
548
                                }
549

    
550
                                src[3*step]-= d;
551
                                src[4*step]+= d;
552
                        }
553
                }
554

    
555
                src += stride;
556
        }
557
/*if(step==16){
558
    STOP_TIMER("step16")
559
}else{
560
    STOP_TIMER("stepX")
561
}*/
562
}
563

    
564
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
565
//Plain C versions
566
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
567
#define COMPILE_C
568
#endif
569

    
570
#ifdef ARCH_POWERPC
571
#ifdef HAVE_ALTIVEC
572
#define COMPILE_ALTIVEC
573
#endif //HAVE_ALTIVEC
574
#endif //ARCH_POWERPC
575

    
576
#if defined(ARCH_X86)
577

    
578
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
579
#define COMPILE_MMX
580
#endif
581

    
582
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
583
#define COMPILE_MMX2
584
#endif
585

    
586
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
587
#define COMPILE_3DNOW
588
#endif
589
#endif /* defined(ARCH_X86) */
590

    
591
#undef HAVE_MMX
592
#undef HAVE_MMX2
593
#undef HAVE_3DNOW
594
#undef HAVE_ALTIVEC
595

    
596
#ifdef COMPILE_C
597
#undef HAVE_MMX
598
#undef HAVE_MMX2
599
#undef HAVE_3DNOW
600
#define RENAME(a) a ## _C
601
#include "postprocess_template.c"
602
#endif
603

    
604
#ifdef ARCH_POWERPC
605
#ifdef COMPILE_ALTIVEC
606
#undef RENAME
607
#define HAVE_ALTIVEC
608
#define RENAME(a) a ## _altivec
609
#include "postprocess_altivec_template.c"
610
#include "postprocess_template.c"
611
#endif
612
#endif //ARCH_POWERPC
613

    
614
//MMX versions
615
#ifdef COMPILE_MMX
616
#undef RENAME
617
#define HAVE_MMX
618
#undef HAVE_MMX2
619
#undef HAVE_3DNOW
620
#define RENAME(a) a ## _MMX
621
#include "postprocess_template.c"
622
#endif
623

    
624
//MMX2 versions
625
#ifdef COMPILE_MMX2
626
#undef RENAME
627
#define HAVE_MMX
628
#define HAVE_MMX2
629
#undef HAVE_3DNOW
630
#define RENAME(a) a ## _MMX2
631
#include "postprocess_template.c"
632
#endif
633

    
634
//3DNOW versions
635
#ifdef COMPILE_3DNOW
636
#undef RENAME
637
#define HAVE_MMX
638
#undef HAVE_MMX2
639
#define HAVE_3DNOW
640
#define RENAME(a) a ## _3DNow
641
#include "postprocess_template.c"
642
#endif
643

    
644
// minor note: the HAVE_xyz is messed up after that line so do not use it.
645

    
646
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
647
        const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
648
{
649
        PPContext *c= (PPContext *)vc;
650
        PPMode *ppMode= (PPMode *)vm;
651
        c->ppMode= *ppMode; //FIXME
652

    
653
        // Using ifs here as they are faster than function pointers although the
654
        // difference would not be measureable here but it is much better because
655
        // someone might exchange the CPU whithout restarting MPlayer ;)
656
#ifdef RUNTIME_CPUDETECT
657
#if defined(ARCH_X86)
658
        // ordered per speed fasterst first
659
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
660
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
661
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
662
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
663
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
664
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
665
        else
666
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
667
#else
668
#ifdef ARCH_POWERPC
669
#ifdef HAVE_ALTIVEC
670
        if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
671
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
672
        else
673
#endif
674
#endif
675
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
676
#endif
677
#else //RUNTIME_CPUDETECT
678
#ifdef HAVE_MMX2
679
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
680
#elif defined (HAVE_3DNOW)
681
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
682
#elif defined (HAVE_MMX)
683
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
684
#elif defined (HAVE_ALTIVEC)
685
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
686
#else
687
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
688
#endif
689
#endif //!RUNTIME_CPUDETECT
690
}
691

    
692
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
693
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
694

    
695
/* -pp Command line Help
696
*/
697
#if LIBPOSTPROC_VERSION_INT < (52<<16)
698
const char *const pp_help=
699
#else
700
const char pp_help[] =
701
#endif
702
"Available postprocessing filters:\n"
703
"Filters                        Options\n"
704
"short  long name       short   long option     Description\n"
705
"*      *               a       autoq           CPU power dependent enabler\n"
706
"                       c       chrom           chrominance filtering enabled\n"
707
"                       y       nochrom         chrominance filtering disabled\n"
708
"                       n       noluma          luma filtering disabled\n"
709
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
710
"       1. difference factor: default=32, higher -> more deblocking\n"
711
"       2. flatness threshold: default=39, lower -> more deblocking\n"
712
"                       the h & v deblocking filters share these\n"
713
"                       so you can't set different thresholds for h / v\n"
714
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
715
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
716
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
717
"h1     x1hdeblock                              experimental h deblock filter 1\n"
718
"v1     x1vdeblock                              experimental v deblock filter 1\n"
719
"dr     dering                                  deringing filter\n"
720
"al     autolevels                              automatic brightness / contrast\n"
721
"                       f        fullyrange     stretch luminance to (0..255)\n"
722
"lb     linblenddeint                           linear blend deinterlacer\n"
723
"li     linipoldeint                            linear interpolating deinterlace\n"
724
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
725
"md     mediandeint                             median deinterlacer\n"
726
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
727
"l5     lowpass5                                FIR lowpass deinterlacer\n"
728
"de     default                                 hb:a,vb:a,dr:a\n"
729
"fa     fast                                    h1:a,v1:a,dr:a\n"
730
"ac                                             ha:a:128:7,va:a,dr:a\n"
731
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
732
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
733
"fq     forceQuant      <quantizer>             force quantizer\n"
734
"Usage:\n"
735
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
736
"long form example:\n"
737
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
738
"short form example:\n"
739
"vb:a/hb:a/lb                                   de,-vb\n"
740
"more examples:\n"
741
"tn:64:128:256\n"
742
"\n"
743
;
744

    
745
pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
746
{
747
        char temp[GET_MODE_BUFFER_SIZE];
748
        char *p= temp;
749
        static const char filterDelimiters[] = ",/";
750
        static const char optionDelimiters[] = ":";
751
        struct PPMode *ppMode;
752
        char *filterToken;
753

    
754
        ppMode= av_malloc(sizeof(PPMode));
755

    
756
        ppMode->lumMode= 0;
757
        ppMode->chromMode= 0;
758
        ppMode->maxTmpNoise[0]= 700;
759
        ppMode->maxTmpNoise[1]= 1500;
760
        ppMode->maxTmpNoise[2]= 3000;
761
        ppMode->maxAllowedY= 234;
762
        ppMode->minAllowedY= 16;
763
        ppMode->baseDcDiff= 256/8;
764
        ppMode->flatnessThreshold= 56-16-1;
765
        ppMode->maxClippedThreshold= 0.01;
766
        ppMode->error=0;
767

    
768
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
769

    
770
        av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
771

    
772
        for(;;){
773
                char *filterName;
774
                int q= 1000000; //PP_QUALITY_MAX;
775
                int chrom=-1;
776
                int luma=-1;
777
                char *option;
778
                char *options[OPTIONS_ARRAY_SIZE];
779
                int i;
780
                int filterNameOk=0;
781
                int numOfUnknownOptions=0;
782
                int enable=1; //does the user want us to enabled or disabled the filter
783

    
784
                filterToken= strtok(p, filterDelimiters);
785
                if(filterToken == NULL) break;
786
                p+= strlen(filterToken) + 1; // p points to next filterToken
787
                filterName= strtok(filterToken, optionDelimiters);
788
                av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
789

    
790
                if(*filterName == '-')
791
                {
792
                        enable=0;
793
                        filterName++;
794
                }
795

    
796
                for(;;){ //for all options
797
                        option= strtok(NULL, optionDelimiters);
798
                        if(option == NULL) break;
799

    
800
                        av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
801
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
802
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
803
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
804
                        else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
805
                        else
806
                        {
807
                                options[numOfUnknownOptions] = option;
808
                                numOfUnknownOptions++;
809
                        }
810
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
811
                }
812
                options[numOfUnknownOptions] = NULL;
813

    
814
                /* replace stuff from the replace Table */
815
                for(i=0; replaceTable[2*i]!=NULL; i++)
816
                {
817
                        if(!strcmp(replaceTable[2*i], filterName))
818
                        {
819
                                int newlen= strlen(replaceTable[2*i + 1]);
820
                                int plen;
821
                                int spaceLeft;
822

    
823
                                if(p==NULL) p= temp, *p=0;      //last filter
824
                                else p--, *p=',';               //not last filter
825

    
826
                                plen= strlen(p);
827
                                spaceLeft= p - temp + plen;
828
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
829
                                {
830
                                        ppMode->error++;
831
                                        break;
832
                                }
833
                                memmove(p + newlen, p, plen+1);
834
                                memcpy(p, replaceTable[2*i + 1], newlen);
835
                                filterNameOk=1;
836
                        }
837
                }
838

    
839
                for(i=0; filters[i].shortName!=NULL; i++)
840
                {
841
                        if(   !strcmp(filters[i].longName, filterName)
842
                           || !strcmp(filters[i].shortName, filterName))
843
                        {
844
                                ppMode->lumMode &= ~filters[i].mask;
845
                                ppMode->chromMode &= ~filters[i].mask;
846

    
847
                                filterNameOk=1;
848
                                if(!enable) break; // user wants to disable it
849

    
850
                                if(q >= filters[i].minLumQuality && luma)
851
                                        ppMode->lumMode|= filters[i].mask;
852
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
853
                                        if(q >= filters[i].minChromQuality)
854
                                                ppMode->chromMode|= filters[i].mask;
855

    
856
                                if(filters[i].mask == LEVEL_FIX)
857
                                {
858
                                        int o;
859
                                        ppMode->minAllowedY= 16;
860
                                        ppMode->maxAllowedY= 234;
861
                                        for(o=0; options[o]!=NULL; o++)
862
                                        {
863
                                                if(  !strcmp(options[o],"fullyrange")
864
                                                   ||!strcmp(options[o],"f"))
865
                                                {
866
                                                        ppMode->minAllowedY= 0;
867
                                                        ppMode->maxAllowedY= 255;
868
                                                        numOfUnknownOptions--;
869
                                                }
870
                                        }
871
                                }
872
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
873
                                {
874
                                        int o;
875
                                        int numOfNoises=0;
876

    
877
                                        for(o=0; options[o]!=NULL; o++)
878
                                        {
879
                                                char *tail;
880
                                                ppMode->maxTmpNoise[numOfNoises]=
881
                                                        strtol(options[o], &tail, 0);
882
                                                if(tail!=options[o])
883
                                                {
884
                                                        numOfNoises++;
885
                                                        numOfUnknownOptions--;
886
                                                        if(numOfNoises >= 3) break;
887
                                                }
888
                                        }
889
                                }
890
                                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
891
                                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
892
                                {
893
                                        int o;
894

    
895
                                        for(o=0; options[o]!=NULL && o<2; o++)
896
                                        {
897
                                                char *tail;
898
                                                int val= strtol(options[o], &tail, 0);
899
                                                if(tail==options[o]) break;
900

    
901
                                                numOfUnknownOptions--;
902
                                                if(o==0) ppMode->baseDcDiff= val;
903
                                                else ppMode->flatnessThreshold= val;
904
                                        }
905
                                }
906
                                else if(filters[i].mask == FORCE_QUANT)
907
                                {
908
                                        int o;
909
                                        ppMode->forcedQuant= 15;
910

    
911
                                        for(o=0; options[o]!=NULL && o<1; o++)
912
                                        {
913
                                                char *tail;
914
                                                int val= strtol(options[o], &tail, 0);
915
                                                if(tail==options[o]) break;
916

    
917
                                                numOfUnknownOptions--;
918
                                                ppMode->forcedQuant= val;
919
                                        }
920
                                }
921
                        }
922
                }
923
                if(!filterNameOk) ppMode->error++;
924
                ppMode->error += numOfUnknownOptions;
925
        }
926

    
927
        av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
928
        if(ppMode->error)
929
        {
930
                av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
931
                av_free(ppMode);
932
                return NULL;
933
        }
934
        return ppMode;
935
}
936

    
937
void pp_free_mode(pp_mode_t *mode){
938
    av_free(mode);
939
}
940

    
941
static void reallocAlign(void **p, int alignment, int size){
942
        av_free(*p);
943
        *p= av_mallocz(size);
944
}
945

    
946
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
947
        int mbWidth = (width+15)>>4;
948
        int mbHeight= (height+15)>>4;
949
        int i;
950

    
951
        c->stride= stride;
952
        c->qpStride= qpStride;
953

    
954
        reallocAlign((void **)&c->tempDst, 8, stride*24);
955
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
956
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
957
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
958
        for(i=0; i<256; i++)
959
                c->yHistogram[i]= width*height/64*15/256;
960

    
961
        for(i=0; i<3; i++)
962
        {
963
                //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
964
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
965
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
966
        }
967

    
968
        reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
969
        reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
970
        reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
971
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
972
}
973

    
974
static const char * context_to_name(void * ptr) {
975
    return "postproc";
976
}
977

    
978
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
979

    
980
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
981
        PPContext *c= av_malloc(sizeof(PPContext));
982
        int stride= (width+15)&(~15);    //assumed / will realloc if needed
983
        int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
984

    
985
        memset(c, 0, sizeof(PPContext));
986
        c->av_class = &av_codec_context_class;
987
        c->cpuCaps= cpuCaps;
988
        if(cpuCaps&PP_FORMAT){
989
                c->hChromaSubSample= cpuCaps&0x3;
990
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
991
        }else{
992
                c->hChromaSubSample= 1;
993
                c->vChromaSubSample= 1;
994
        }
995

    
996
        reallocBuffers(c, width, height, stride, qpStride);
997

    
998
        c->frameNum=-1;
999

    
1000
        return c;
1001
}
1002

    
1003
void pp_free_context(void *vc){
1004
        PPContext *c = (PPContext*)vc;
1005
        int i;
1006

    
1007
        for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1008
        for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1009

    
1010
        av_free(c->tempBlocks);
1011
        av_free(c->yHistogram);
1012
        av_free(c->tempDst);
1013
        av_free(c->tempSrc);
1014
        av_free(c->deintTemp);
1015
        av_free(c->stdQPTable);
1016
        av_free(c->nonBQPTable);
1017
        av_free(c->forcedQPTable);
1018

    
1019
        memset(c, 0, sizeof(PPContext));
1020

    
1021
        av_free(c);
1022
}
1023

    
1024
void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
1025
                 uint8_t * dst[3], const int dstStride[3],
1026
                 int width, int height,
1027
                 const QP_STORE_T *QP_store,  int QPStride,
1028
                 pp_mode_t *vm,  void *vc, int pict_type)
1029
{
1030
        int mbWidth = (width+15)>>4;
1031
        int mbHeight= (height+15)>>4;
1032
        PPMode *mode = (PPMode*)vm;
1033
        PPContext *c = (PPContext*)vc;
1034
        int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1035
        int absQPStride = FFABS(QPStride);
1036

    
1037
        // c->stride and c->QPStride are always positive
1038
        if(c->stride < minStride || c->qpStride < absQPStride)
1039
                reallocBuffers(c, width, height,
1040
                                FFMAX(minStride, c->stride),
1041
                                FFMAX(c->qpStride, absQPStride));
1042

    
1043
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1044
        {
1045
                int i;
1046
                QP_store= c->forcedQPTable;
1047
                absQPStride = QPStride = 0;
1048
                if(mode->lumMode & FORCE_QUANT)
1049
                        for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1050
                else
1051
                        for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1052
        }
1053

    
1054
        if(pict_type & PP_PICT_TYPE_QP2){
1055
                int i;
1056
                const int count= mbHeight * absQPStride;
1057
                for(i=0; i<(count>>2); i++){
1058
                        ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1059
                }
1060
                for(i<<=2; i<count; i++){
1061
                        c->stdQPTable[i] = QP_store[i]>>1;
1062
                }
1063
                QP_store= c->stdQPTable;
1064
                QPStride= absQPStride;
1065
        }
1066

    
1067
if(0){
1068
int x,y;
1069
for(y=0; y<mbHeight; y++){
1070
        for(x=0; x<mbWidth; x++){
1071
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1072
        }
1073
        av_log(c, AV_LOG_INFO, "\n");
1074
}
1075
        av_log(c, AV_LOG_INFO, "\n");
1076
}
1077

    
1078
        if((pict_type&7)!=3)
1079
        {
1080
                if (QPStride >= 0) {
1081
                        int i;
1082
                        const int count= mbHeight * QPStride;
1083
                        for(i=0; i<(count>>2); i++){
1084
                                ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1085
                        }
1086
                        for(i<<=2; i<count; i++){
1087
                                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1088
                        }
1089
                } else {
1090
                        int i,j;
1091
                        for(i=0; i<mbHeight; i++) {
1092
                                    for(j=0; j<absQPStride; j++) {
1093
                                        c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1094
                                }
1095
                        }
1096
                }
1097
        }
1098

    
1099
        av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1100
               mode->lumMode, mode->chromMode);
1101

    
1102
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1103
                width, height, QP_store, QPStride, 0, mode, c);
1104

    
1105
        width  = (width )>>c->hChromaSubSample;
1106
        height = (height)>>c->vChromaSubSample;
1107

    
1108
        if(mode->chromMode)
1109
        {
1110
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1111
                        width, height, QP_store, QPStride, 1, mode, c);
1112
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1113
                        width, height, QP_store, QPStride, 2, mode, c);
1114
        }
1115
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1116
        {
1117
                linecpy(dst[1], src[1], height, srcStride[1]);
1118
                linecpy(dst[2], src[2], height, srcStride[2]);
1119
        }
1120
        else
1121
        {
1122
                int y;
1123
                for(y=0; y<height; y++)
1124
                {
1125
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1126
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1127
                }
1128
        }
1129
}
1130