Statistics
| Branch: | Revision:

ffmpeg / libpostproc / postprocess.c @ 0bda7817

History | View | Annotate | Download (44 KB)

1
/*
2
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23
/**
24
 * @file postprocess.c
25
 * postprocessing.
26
 */
27

    
28
/*
29
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49

50
* i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
51
# more or less selfinvented filters so the exactness isnt too meaningfull
52
E = Exact implementation
53
e = allmost exact implementation (slightly different rounding,...)
54
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57
*/
58

    
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
        (the if/else stuff per block is slowing things down)
67
compare the quality & speed of all filters
68
split this huge file
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72
*/
73

    
74
//Changelog: use the Subversion log
75

    
76
#include "config.h"
77
#include "avutil.h"
78
#include <inttypes.h>
79
#include <stdio.h>
80
#include <stdlib.h>
81
#include <string.h>
82
#ifdef HAVE_MALLOC_H
83
#include <malloc.h>
84
#endif
85
//#undef HAVE_MMX2
86
//#define HAVE_3DNOW
87
//#undef HAVE_MMX
88
//#undef ARCH_X86
89
//#define DEBUG_BRIGHTNESS
90
#ifdef USE_FASTMEMCPY
91
#include "libvo/fastmemcpy.h"
92
#endif
93
#include "postprocess.h"
94
#include "postprocess_internal.h"
95

    
96
#include "mangle.h" //FIXME should be supressed
97

    
98
#ifdef HAVE_ALTIVEC_H
99
#include <altivec.h>
100
#endif
101

    
102
#define GET_MODE_BUFFER_SIZE 500
103
#define OPTIONS_ARRAY_SIZE 10
104
#define BLOCK_SIZE 8
105
#define TEMP_STRIDE 8
106
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
107

    
108
#if defined(ARCH_X86)
109
static DECLARE_ALIGNED(8, uint64_t attribute_used, w05)= 0x0005000500050005LL;
110
static DECLARE_ALIGNED(8, uint64_t attribute_used, w04)= 0x0004000400040004LL;
111
static DECLARE_ALIGNED(8, uint64_t attribute_used, w20)= 0x0020002000200020LL;
112
static DECLARE_ALIGNED(8, uint64_t attribute_used, b00)= 0x0000000000000000LL;
113
static DECLARE_ALIGNED(8, uint64_t attribute_used, b01)= 0x0101010101010101LL;
114
static DECLARE_ALIGNED(8, uint64_t attribute_used, b02)= 0x0202020202020202LL;
115
static DECLARE_ALIGNED(8, uint64_t attribute_used, b08)= 0x0808080808080808LL;
116
static DECLARE_ALIGNED(8, uint64_t attribute_used, b80)= 0x8080808080808080LL;
117
#endif
118

    
119
static uint8_t clip_table[3*256];
120
static uint8_t * const clip_tab= clip_table + 256;
121

    
122
static const int attribute_used deringThreshold= 20;
123

    
124

    
125
static struct PPFilter filters[]=
126
{
127
        {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
128
        {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
129
/*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
130
        {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
131
        {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
132
        {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
133
        {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
134
        {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
135
        {"dr", "dering",                1, 5, 6, DERING},
136
        {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
137
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
138
        {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
139
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
140
        {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
141
        {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
142
        {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
143
        {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
144
        {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
145
        {NULL, NULL,0,0,0,0} //End Marker
146
};
147

    
148
static const char *replaceTable[]=
149
{
150
        "default",      "hdeblock:a,vdeblock:a,dering:a",
151
        "de",           "hdeblock:a,vdeblock:a,dering:a",
152
        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
153
        "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a",
154
        "ac",           "ha:a:128:7,va:a,dering:a",
155
        NULL //End Marker
156
};
157

    
158

    
159
#if defined(ARCH_X86)
160
static inline void prefetchnta(void *p)
161
{
162
        asm volatile(   "prefetchnta (%0)\n\t"
163
                : : "r" (p)
164
        );
165
}
166

    
167
static inline void prefetcht0(void *p)
168
{
169
        asm volatile(   "prefetcht0 (%0)\n\t"
170
                : : "r" (p)
171
        );
172
}
173

    
174
static inline void prefetcht1(void *p)
175
{
176
        asm volatile(   "prefetcht1 (%0)\n\t"
177
                : : "r" (p)
178
        );
179
}
180

    
181
static inline void prefetcht2(void *p)
182
{
183
        asm volatile(   "prefetcht2 (%0)\n\t"
184
                : : "r" (p)
185
        );
186
}
187
#endif
188

    
189
// The horizontal Functions exist only in C cuz the MMX code is faster with vertical filters and transposing
190

    
191
/**
192
 * Check if the given 8x8 Block is mostly "flat"
193
 */
194
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
195
{
196
        int numEq= 0;
197
        int y;
198
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
199
        const int dcThreshold= dcOffset*2 + 1;
200

    
201
        for(y=0; y<BLOCK_SIZE; y++)
202
        {
203
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
204
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
205
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
206
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
207
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
208
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
209
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
210
                src+= stride;
211
        }
212
        return numEq > c->ppMode.flatnessThreshold;
213
}
214

    
215
/**
216
 * Check if the middle 8x8 Block in the given 8x16 block is flat
217
 */
218
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
219
        int numEq= 0;
220
        int y;
221
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
222
        const int dcThreshold= dcOffset*2 + 1;
223

    
224
        src+= stride*4; // src points to begin of the 8x8 Block
225
        for(y=0; y<BLOCK_SIZE-1; y++)
226
        {
227
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
228
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
229
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
230
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
231
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
232
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
233
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
234
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
235
                src+= stride;
236
        }
237
        return numEq > c->ppMode.flatnessThreshold;
238
}
239

    
240
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
241
{
242
        int i;
243
#if 1
244
        for(i=0; i<2; i++){
245
                if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
246
                src += stride;
247
                if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
248
                src += stride;
249
                if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
250
                src += stride;
251
                if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
252
                src += stride;
253
        }
254
#else
255
        for(i=0; i<8; i++){
256
                if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
257
                src += stride;
258
        }
259
#endif
260
        return 1;
261
}
262

    
263
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
264
{
265
#if 1
266
#if 1
267
        int x;
268
        src+= stride*4;
269
        for(x=0; x<BLOCK_SIZE; x+=4)
270
        {
271
                if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
272
                if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
273
                if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
274
                if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
275
        }
276
#else
277
        int x;
278
        src+= stride*3;
279
        for(x=0; x<BLOCK_SIZE; x++)
280
        {
281
                if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
282
        }
283
#endif
284
        return 1;
285
#else
286
        int x;
287
        src+= stride*4;
288
        for(x=0; x<BLOCK_SIZE; x++)
289
        {
290
                int min=255;
291
                int max=0;
292
                int y;
293
                for(y=0; y<8; y++){
294
                        int v= src[x + y*stride];
295
                        if(v>max) max=v;
296
                        if(v<min) min=v;
297
                }
298
                if(max-min > 2*QP) return 0;
299
        }
300
        return 1;
301
#endif
302
}
303

    
304
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
305
        if( isHorizDC_C(src, stride, c) ){
306
                if( isHorizMinMaxOk_C(src, stride, c->QP) )
307
                        return 1;
308
                else
309
                        return 0;
310
        }else{
311
                return 2;
312
        }
313
}
314

    
315
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
316
        if( isVertDC_C(src, stride, c) ){
317
                if( isVertMinMaxOk_C(src, stride, c->QP) )
318
                        return 1;
319
                else
320
                        return 0;
321
        }else{
322
                return 2;
323
        }
324
}
325

    
326
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
327
{
328
        int y;
329
        for(y=0; y<BLOCK_SIZE; y++)
330
        {
331
                const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
332

    
333
                if(FFABS(middleEnergy) < 8*c->QP)
334
                {
335
                        const int q=(dst[3] - dst[4])/2;
336
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
337
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
338

    
339
                        int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
340
                        d= FFMAX(d, 0);
341

    
342
                        d= (5*d + 32) >> 6;
343
                        d*= FFSIGN(-middleEnergy);
344

    
345
                        if(q>0)
346
                        {
347
                                d= d<0 ? 0 : d;
348
                                d= d>q ? q : d;
349
                        }
350
                        else
351
                        {
352
                                d= d>0 ? 0 : d;
353
                                d= d<q ? q : d;
354
                        }
355

    
356
                        dst[3]-= d;
357
                        dst[4]+= d;
358
                }
359
                dst+= stride;
360
        }
361
}
362

    
363
/**
364
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
365
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
366
 */
367
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
368
{
369
        int y;
370
        for(y=0; y<BLOCK_SIZE; y++)
371
        {
372
                const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
373
                const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
374

    
375
                int sums[10];
376
                sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
377
                sums[1] = sums[0] - first  + dst[3];
378
                sums[2] = sums[1] - first  + dst[4];
379
                sums[3] = sums[2] - first  + dst[5];
380
                sums[4] = sums[3] - first  + dst[6];
381
                sums[5] = sums[4] - dst[0] + dst[7];
382
                sums[6] = sums[5] - dst[1] + last;
383
                sums[7] = sums[6] - dst[2] + last;
384
                sums[8] = sums[7] - dst[3] + last;
385
                sums[9] = sums[8] - dst[4] + last;
386

    
387
                dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
388
                dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
389
                dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
390
                dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
391
                dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
392
                dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
393
                dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
394
                dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
395

    
396
                dst+= stride;
397
        }
398
}
399

    
400
/**
401
 * Experimental Filter 1 (Horizontal)
402
 * will not damage linear gradients
403
 * Flat blocks should look like they where passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
404
 * can only smooth blocks at the expected locations (it cant smooth them if they did move)
405
 * MMX2 version does correct clipping C version doesnt
406
 * not identical with the vertical one
407
 */
408
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
409
{
410
        int y;
411
        static uint64_t *lut= NULL;
412
        if(lut==NULL)
413
        {
414
                int i;
415
                lut = av_malloc(256*8);
416
                for(i=0; i<256; i++)
417
                {
418
                        int v= i < 128 ? 2*i : 2*(i-256);
419
/*
420
//Simulate 112242211 9-Tap filter
421
                        uint64_t a= (v/16) & 0xFF;
422
                        uint64_t b= (v/8) & 0xFF;
423
                        uint64_t c= (v/4) & 0xFF;
424
                        uint64_t d= (3*v/8) & 0xFF;
425
*/
426
//Simulate piecewise linear interpolation
427
                        uint64_t a= (v/16) & 0xFF;
428
                        uint64_t b= (v*3/16) & 0xFF;
429
                        uint64_t c= (v*5/16) & 0xFF;
430
                        uint64_t d= (7*v/16) & 0xFF;
431
                        uint64_t A= (0x100 - a)&0xFF;
432
                        uint64_t B= (0x100 - b)&0xFF;
433
                        uint64_t C= (0x100 - c)&0xFF;
434
                        uint64_t D= (0x100 - c)&0xFF;
435

    
436
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
437
                                (D<<24) | (C<<16) | (B<<8) | (A);
438
                        //lut[i] = (v<<32) | (v<<24);
439
                }
440
        }
441

    
442
        for(y=0; y<BLOCK_SIZE; y++)
443
        {
444
                int a= src[1] - src[2];
445
                int b= src[3] - src[4];
446
                int c= src[5] - src[6];
447

    
448
                int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
449

    
450
                if(d < QP)
451
                {
452
                        int v = d * FFSIGN(-b);
453

    
454
                        src[1] +=v/8;
455
                        src[2] +=v/4;
456
                        src[3] +=3*v/8;
457
                        src[4] -=3*v/8;
458
                        src[5] -=v/4;
459
                        src[6] -=v/8;
460

    
461
                }
462
                src+=stride;
463
        }
464
}
465

    
466
/**
467
 * accurate deblock filter
468
 */
469
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
470
        int y;
471
        const int QP= c->QP;
472
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
473
        const int dcThreshold= dcOffset*2 + 1;
474
//START_TIMER
475
        src+= step*4; // src points to begin of the 8x8 Block
476
        for(y=0; y<8; y++){
477
                int numEq= 0;
478

    
479
                if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
480
                if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
481
                if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
482
                if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
483
                if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
484
                if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
485
                if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
486
                if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
487
                if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
488
                if(numEq > c->ppMode.flatnessThreshold){
489
                        int min, max, x;
490

    
491
                        if(src[0] > src[step]){
492
                            max= src[0];
493
                            min= src[step];
494
                        }else{
495
                            max= src[step];
496
                            min= src[0];
497
                        }
498
                        for(x=2; x<8; x+=2){
499
                                if(src[x*step] > src[(x+1)*step]){
500
                                        if(src[x    *step] > max) max= src[ x   *step];
501
                                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
502
                                }else{
503
                                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
504
                                        if(src[ x   *step] < min) min= src[ x   *step];
505
                                }
506
                        }
507
                        if(max-min < 2*QP){
508
                                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
509
                                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
510

    
511
                                int sums[10];
512
                                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
513
                                sums[1] = sums[0] - first       + src[3*step];
514
                                sums[2] = sums[1] - first       + src[4*step];
515
                                sums[3] = sums[2] - first       + src[5*step];
516
                                sums[4] = sums[3] - first       + src[6*step];
517
                                sums[5] = sums[4] - src[0*step] + src[7*step];
518
                                sums[6] = sums[5] - src[1*step] + last;
519
                                sums[7] = sums[6] - src[2*step] + last;
520
                                sums[8] = sums[7] - src[3*step] + last;
521
                                sums[9] = sums[8] - src[4*step] + last;
522

    
523
                                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
524
                                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
525
                                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
526
                                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
527
                                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
528
                                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
529
                                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
530
                                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
531
                        }
532
                }else{
533
                        const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
534

    
535
                        if(FFABS(middleEnergy) < 8*QP)
536
                        {
537
                                const int q=(src[3*step] - src[4*step])/2;
538
                                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
539
                                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
540

    
541
                                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
542
                                d= FFMAX(d, 0);
543

    
544
                                d= (5*d + 32) >> 6;
545
                                d*= FFSIGN(-middleEnergy);
546

    
547
                                if(q>0)
548
                                {
549
                                        d= d<0 ? 0 : d;
550
                                        d= d>q ? q : d;
551
                                }
552
                                else
553
                                {
554
                                        d= d>0 ? 0 : d;
555
                                        d= d<q ? q : d;
556
                                }
557

    
558
                                src[3*step]-= d;
559
                                src[4*step]+= d;
560
                        }
561
                }
562

    
563
                src += stride;
564
        }
565
/*if(step==16){
566
    STOP_TIMER("step16")
567
}else{
568
    STOP_TIMER("stepX")
569
}*/
570
}
571

    
572
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
573
//Plain C versions
574
#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
575
#define COMPILE_C
576
#endif
577

    
578
#ifdef ARCH_POWERPC
579
#ifdef HAVE_ALTIVEC
580
#define COMPILE_ALTIVEC
581
#endif //HAVE_ALTIVEC
582
#endif //ARCH_POWERPC
583

    
584
#if defined(ARCH_X86)
585

    
586
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
587
#define COMPILE_MMX
588
#endif
589

    
590
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
591
#define COMPILE_MMX2
592
#endif
593

    
594
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
595
#define COMPILE_3DNOW
596
#endif
597
#endif /* defined(ARCH_X86) */
598

    
599
#undef HAVE_MMX
600
#undef HAVE_MMX2
601
#undef HAVE_3DNOW
602
#undef HAVE_ALTIVEC
603

    
604
#ifdef COMPILE_C
605
#undef HAVE_MMX
606
#undef HAVE_MMX2
607
#undef HAVE_3DNOW
608
#define RENAME(a) a ## _C
609
#include "postprocess_template.c"
610
#endif
611

    
612
#ifdef ARCH_POWERPC
613
#ifdef COMPILE_ALTIVEC
614
#undef RENAME
615
#define HAVE_ALTIVEC
616
#define RENAME(a) a ## _altivec
617
#include "postprocess_altivec_template.c"
618
#include "postprocess_template.c"
619
#endif
620
#endif //ARCH_POWERPC
621

    
622
//MMX versions
623
#ifdef COMPILE_MMX
624
#undef RENAME
625
#define HAVE_MMX
626
#undef HAVE_MMX2
627
#undef HAVE_3DNOW
628
#define RENAME(a) a ## _MMX
629
#include "postprocess_template.c"
630
#endif
631

    
632
//MMX2 versions
633
#ifdef COMPILE_MMX2
634
#undef RENAME
635
#define HAVE_MMX
636
#define HAVE_MMX2
637
#undef HAVE_3DNOW
638
#define RENAME(a) a ## _MMX2
639
#include "postprocess_template.c"
640
#endif
641

    
642
//3DNOW versions
643
#ifdef COMPILE_3DNOW
644
#undef RENAME
645
#define HAVE_MMX
646
#undef HAVE_MMX2
647
#define HAVE_3DNOW
648
#define RENAME(a) a ## _3DNow
649
#include "postprocess_template.c"
650
#endif
651

    
652
// minor note: the HAVE_xyz is messed up after that line so dont use it
653

    
654
static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
655
        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
656
{
657
        PPContext *c= (PPContext *)vc;
658
        PPMode *ppMode= (PPMode *)vm;
659
        c->ppMode= *ppMode; //FIXME
660

    
661
        // useing ifs here as they are faster than function pointers allthough the
662
        // difference wouldnt be messureable here but its much better because
663
        // someone might exchange the cpu whithout restarting mplayer ;)
664
#ifdef RUNTIME_CPUDETECT
665
#if defined(ARCH_X86)
666
        // ordered per speed fasterst first
667
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
668
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
669
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
670
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
671
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
672
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
673
        else
674
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
675
#else
676
#ifdef ARCH_POWERPC
677
#ifdef HAVE_ALTIVEC
678
        if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
679
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
680
        else
681
#endif
682
#endif
683
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
684
#endif
685
#else //RUNTIME_CPUDETECT
686
#ifdef HAVE_MMX2
687
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
688
#elif defined (HAVE_3DNOW)
689
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
690
#elif defined (HAVE_MMX)
691
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
692
#elif defined (HAVE_ALTIVEC)
693
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
694
#else
695
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
696
#endif
697
#endif //!RUNTIME_CPUDETECT
698
}
699

    
700
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
701
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
702

    
703
/* -pp Command line Help
704
*/
705
char *pp_help=
706
"Available postprocessing filters:\n"
707
"Filters                        Options\n"
708
"short  long name       short   long option     Description\n"
709
"*      *               a       autoq           CPU power dependent enabler\n"
710
"                       c       chrom           chrominance filtering enabled\n"
711
"                       y       nochrom         chrominance filtering disabled\n"
712
"                       n       noluma          luma filtering disabled\n"
713
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
714
"       1. difference factor: default=32, higher -> more deblocking\n"
715
"       2. flatness threshold: default=39, lower -> more deblocking\n"
716
"                       the h & v deblocking filters share these\n"
717
"                       so you can't set different thresholds for h / v\n"
718
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
719
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
720
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
721
"h1     x1hdeblock                              experimental h deblock filter 1\n"
722
"v1     x1vdeblock                              experimental v deblock filter 1\n"
723
"dr     dering                                  deringing filter\n"
724
"al     autolevels                              automatic brightness / contrast\n"
725
"                       f        fullyrange     stretch luminance to (0..255)\n"
726
"lb     linblenddeint                           linear blend deinterlacer\n"
727
"li     linipoldeint                            linear interpolating deinterlace\n"
728
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
729
"md     mediandeint                             median deinterlacer\n"
730
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
731
"l5     lowpass5                                FIR lowpass deinterlacer\n"
732
"de     default                                 hb:a,vb:a,dr:a\n"
733
"fa     fast                                    h1:a,v1:a,dr:a\n"
734
"ac                                             ha:a:128:7,va:a,dr:a\n"
735
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
736
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
737
"fq     forceQuant      <quantizer>             force quantizer\n"
738
"Usage:\n"
739
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
740
"long form example:\n"
741
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
742
"short form example:\n"
743
"vb:a/hb:a/lb                                   de,-vb\n"
744
"more examples:\n"
745
"tn:64:128:256\n"
746
"\n"
747
;
748

    
749
pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
750
{
751
        char temp[GET_MODE_BUFFER_SIZE];
752
        char *p= temp;
753
        const char *filterDelimiters= ",/";
754
        const char *optionDelimiters= ":";
755
        struct PPMode *ppMode;
756
        char *filterToken;
757

    
758
        ppMode= av_malloc(sizeof(PPMode));
759

    
760
        ppMode->lumMode= 0;
761
        ppMode->chromMode= 0;
762
        ppMode->maxTmpNoise[0]= 700;
763
        ppMode->maxTmpNoise[1]= 1500;
764
        ppMode->maxTmpNoise[2]= 3000;
765
        ppMode->maxAllowedY= 234;
766
        ppMode->minAllowedY= 16;
767
        ppMode->baseDcDiff= 256/8;
768
        ppMode->flatnessThreshold= 56-16-1;
769
        ppMode->maxClippedThreshold= 0.01;
770
        ppMode->error=0;
771

    
772
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
773

    
774
        av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
775

    
776
        for(;;){
777
                char *filterName;
778
                int q= 1000000; //PP_QUALITY_MAX;
779
                int chrom=-1;
780
                int luma=-1;
781
                char *option;
782
                char *options[OPTIONS_ARRAY_SIZE];
783
                int i;
784
                int filterNameOk=0;
785
                int numOfUnknownOptions=0;
786
                int enable=1; //does the user want us to enabled or disabled the filter
787

    
788
                filterToken= strtok(p, filterDelimiters);
789
                if(filterToken == NULL) break;
790
                p+= strlen(filterToken) + 1; // p points to next filterToken
791
                filterName= strtok(filterToken, optionDelimiters);
792
                av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
793

    
794
                if(*filterName == '-')
795
                {
796
                        enable=0;
797
                        filterName++;
798
                }
799

    
800
                for(;;){ //for all options
801
                        option= strtok(NULL, optionDelimiters);
802
                        if(option == NULL) break;
803

    
804
                        av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
805
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
806
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
807
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
808
                        else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
809
                        else
810
                        {
811
                                options[numOfUnknownOptions] = option;
812
                                numOfUnknownOptions++;
813
                        }
814
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
815
                }
816
                options[numOfUnknownOptions] = NULL;
817

    
818
                /* replace stuff from the replace Table */
819
                for(i=0; replaceTable[2*i]!=NULL; i++)
820
                {
821
                        if(!strcmp(replaceTable[2*i], filterName))
822
                        {
823
                                int newlen= strlen(replaceTable[2*i + 1]);
824
                                int plen;
825
                                int spaceLeft;
826

    
827
                                if(p==NULL) p= temp, *p=0;      //last filter
828
                                else p--, *p=',';               //not last filter
829

    
830
                                plen= strlen(p);
831
                                spaceLeft= p - temp + plen;
832
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
833
                                {
834
                                        ppMode->error++;
835
                                        break;
836
                                }
837
                                memmove(p + newlen, p, plen+1);
838
                                memcpy(p, replaceTable[2*i + 1], newlen);
839
                                filterNameOk=1;
840
                        }
841
                }
842

    
843
                for(i=0; filters[i].shortName!=NULL; i++)
844
                {
845
                        if(   !strcmp(filters[i].longName, filterName)
846
                           || !strcmp(filters[i].shortName, filterName))
847
                        {
848
                                ppMode->lumMode &= ~filters[i].mask;
849
                                ppMode->chromMode &= ~filters[i].mask;
850

    
851
                                filterNameOk=1;
852
                                if(!enable) break; // user wants to disable it
853

    
854
                                if(q >= filters[i].minLumQuality && luma)
855
                                        ppMode->lumMode|= filters[i].mask;
856
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
857
                                        if(q >= filters[i].minChromQuality)
858
                                                ppMode->chromMode|= filters[i].mask;
859

    
860
                                if(filters[i].mask == LEVEL_FIX)
861
                                {
862
                                        int o;
863
                                        ppMode->minAllowedY= 16;
864
                                        ppMode->maxAllowedY= 234;
865
                                        for(o=0; options[o]!=NULL; o++)
866
                                        {
867
                                                if(  !strcmp(options[o],"fullyrange")
868
                                                   ||!strcmp(options[o],"f"))
869
                                                {
870
                                                        ppMode->minAllowedY= 0;
871
                                                        ppMode->maxAllowedY= 255;
872
                                                        numOfUnknownOptions--;
873
                                                }
874
                                        }
875
                                }
876
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
877
                                {
878
                                        int o;
879
                                        int numOfNoises=0;
880

    
881
                                        for(o=0; options[o]!=NULL; o++)
882
                                        {
883
                                                char *tail;
884
                                                ppMode->maxTmpNoise[numOfNoises]=
885
                                                        strtol(options[o], &tail, 0);
886
                                                if(tail!=options[o])
887
                                                {
888
                                                        numOfNoises++;
889
                                                        numOfUnknownOptions--;
890
                                                        if(numOfNoises >= 3) break;
891
                                                }
892
                                        }
893
                                }
894
                                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
895
                                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
896
                                {
897
                                        int o;
898

    
899
                                        for(o=0; options[o]!=NULL && o<2; o++)
900
                                        {
901
                                                char *tail;
902
                                                int val= strtol(options[o], &tail, 0);
903
                                                if(tail==options[o]) break;
904

    
905
                                                numOfUnknownOptions--;
906
                                                if(o==0) ppMode->baseDcDiff= val;
907
                                                else ppMode->flatnessThreshold= val;
908
                                        }
909
                                }
910
                                else if(filters[i].mask == FORCE_QUANT)
911
                                {
912
                                        int o;
913
                                        ppMode->forcedQuant= 15;
914

    
915
                                        for(o=0; options[o]!=NULL && o<1; o++)
916
                                        {
917
                                                char *tail;
918
                                                int val= strtol(options[o], &tail, 0);
919
                                                if(tail==options[o]) break;
920

    
921
                                                numOfUnknownOptions--;
922
                                                ppMode->forcedQuant= val;
923
                                        }
924
                                }
925
                        }
926
                }
927
                if(!filterNameOk) ppMode->error++;
928
                ppMode->error += numOfUnknownOptions;
929
        }
930

    
931
        av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
932
        if(ppMode->error)
933
        {
934
                av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
935
                av_free(ppMode);
936
                return NULL;
937
        }
938
        return ppMode;
939
}
940

    
941
void pp_free_mode(pp_mode_t *mode){
942
    av_free(mode);
943
}
944

    
945
static void reallocAlign(void **p, int alignment, int size){
946
        av_free(*p);
947
        *p= av_mallocz(size);
948
}
949

    
950
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
951
        int mbWidth = (width+15)>>4;
952
        int mbHeight= (height+15)>>4;
953
        int i;
954

    
955
        c->stride= stride;
956
        c->qpStride= qpStride;
957

    
958
        reallocAlign((void **)&c->tempDst, 8, stride*24);
959
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
960
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
961
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
962
        for(i=0; i<256; i++)
963
                c->yHistogram[i]= width*height/64*15/256;
964

    
965
        for(i=0; i<3; i++)
966
        {
967
                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
968
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
969
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
970
        }
971

    
972
        reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
973
        reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
974
        reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
975
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
976
}
977

    
978
static void global_init(void){
979
        int i;
980
        memset(clip_table, 0, 256);
981
        for(i=256; i<512; i++)
982
                clip_table[i]= i;
983
        memset(clip_table+512, 0, 256);
984
}
985

    
986
static const char * context_to_name(void * ptr) {
987
    return "postproc";
988
}
989

    
990
static AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
991

    
992
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
993
        PPContext *c= av_malloc(sizeof(PPContext));
994
        int stride= (width+15)&(~15);    //assumed / will realloc if needed
995
        int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
996

    
997
        global_init();
998

    
999
        memset(c, 0, sizeof(PPContext));
1000
        c->av_class = &av_codec_context_class;
1001
        c->cpuCaps= cpuCaps;
1002
        if(cpuCaps&PP_FORMAT){
1003
                c->hChromaSubSample= cpuCaps&0x3;
1004
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
1005
        }else{
1006
                c->hChromaSubSample= 1;
1007
                c->vChromaSubSample= 1;
1008
        }
1009

    
1010
        reallocBuffers(c, width, height, stride, qpStride);
1011

    
1012
        c->frameNum=-1;
1013

    
1014
        return c;
1015
}
1016

    
1017
void pp_free_context(void *vc){
1018
        PPContext *c = (PPContext*)vc;
1019
        int i;
1020

    
1021
        for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1022
        for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1023

    
1024
        av_free(c->tempBlocks);
1025
        av_free(c->yHistogram);
1026
        av_free(c->tempDst);
1027
        av_free(c->tempSrc);
1028
        av_free(c->deintTemp);
1029
        av_free(c->stdQPTable);
1030
        av_free(c->nonBQPTable);
1031
        av_free(c->forcedQPTable);
1032

    
1033
        memset(c, 0, sizeof(PPContext));
1034

    
1035
        av_free(c);
1036
}
1037

    
1038
void  pp_postprocess(uint8_t * src[3], int srcStride[3],
1039
                 uint8_t * dst[3], int dstStride[3],
1040
                 int width, int height,
1041
                 QP_STORE_T *QP_store,  int QPStride,
1042
                 pp_mode_t *vm,  void *vc, int pict_type)
1043
{
1044
        int mbWidth = (width+15)>>4;
1045
        int mbHeight= (height+15)>>4;
1046
        PPMode *mode = (PPMode*)vm;
1047
        PPContext *c = (PPContext*)vc;
1048
        int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1049
        int absQPStride = FFABS(QPStride);
1050

    
1051
        // c->stride and c->QPStride are always positive
1052
        if(c->stride < minStride || c->qpStride < absQPStride)
1053
                reallocBuffers(c, width, height,
1054
                                FFMAX(minStride, c->stride),
1055
                                FFMAX(c->qpStride, absQPStride));
1056

    
1057
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1058
        {
1059
                int i;
1060
                QP_store= c->forcedQPTable;
1061
                absQPStride = QPStride = 0;
1062
                if(mode->lumMode & FORCE_QUANT)
1063
                        for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
1064
                else
1065
                        for(i=0; i<mbWidth; i++) QP_store[i]= 1;
1066
        }
1067

    
1068
        if(pict_type & PP_PICT_TYPE_QP2){
1069
                int i;
1070
                const int count= mbHeight * absQPStride;
1071
                for(i=0; i<(count>>2); i++){
1072
                        ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1073
                }
1074
                for(i<<=2; i<count; i++){
1075
                        c->stdQPTable[i] = QP_store[i]>>1;
1076
                }
1077
                QP_store= c->stdQPTable;
1078
                QPStride= absQPStride;
1079
        }
1080

    
1081
if(0){
1082
int x,y;
1083
for(y=0; y<mbHeight; y++){
1084
        for(x=0; x<mbWidth; x++){
1085
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1086
        }
1087
        av_log(c, AV_LOG_INFO, "\n");
1088
}
1089
        av_log(c, AV_LOG_INFO, "\n");
1090
}
1091

    
1092
        if((pict_type&7)!=3)
1093
        {
1094
                if (QPStride >= 0) {
1095
                        int i;
1096
                        const int count= mbHeight * QPStride;
1097
                        for(i=0; i<(count>>2); i++){
1098
                                ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1099
                        }
1100
                        for(i<<=2; i<count; i++){
1101
                                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1102
                        }
1103
                } else {
1104
                        int i,j;
1105
                        for(i=0; i<mbHeight; i++) {
1106
                                    for(j=0; j<absQPStride; j++) {
1107
                                        c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1108
                                }
1109
                        }
1110
                }
1111
        }
1112

    
1113
        av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1114
               mode->lumMode, mode->chromMode);
1115

    
1116
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1117
                width, height, QP_store, QPStride, 0, mode, c);
1118

    
1119
        width  = (width )>>c->hChromaSubSample;
1120
        height = (height)>>c->vChromaSubSample;
1121

    
1122
        if(mode->chromMode)
1123
        {
1124
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1125
                        width, height, QP_store, QPStride, 1, mode, c);
1126
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1127
                        width, height, QP_store, QPStride, 2, mode, c);
1128
        }
1129
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1130
        {
1131
                linecpy(dst[1], src[1], height, srcStride[1]);
1132
                linecpy(dst[2], src[2], height, srcStride[2]);
1133
        }
1134
        else
1135
        {
1136
                int y;
1137
                for(y=0; y<height; y++)
1138
                {
1139
                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1140
                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1141
                }
1142
        }
1143
}
1144