Statistics
| Branch: | Revision:

ffmpeg / libpostproc / postprocess.c @ bbea3555

History | View | Annotate | Download (37.3 KB)

1
/*
2
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23
/**
24
 * @file postprocess.c
25
 * postprocessing.
26
 */
27

    
28
/*
29
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49

50
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
53
e = almost exact implementation (slightly different rounding,...)
54
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57
*/
58

    
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
        (the if/else stuff per block is slowing things down)
67
compare the quality & speed of all filters
68
split this huge file
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72
*/
73

    
74
//Changelog: use the Subversion log
75

    
76
#include "config.h"
77
#include "libavutil/avutil.h"
78
#include <inttypes.h>
79
#include <stdio.h>
80
#include <stdlib.h>
81
#include <string.h>
82
//#undef HAVE_MMX2
83
//#define HAVE_3DNOW
84
//#undef HAVE_MMX
85
//#undef ARCH_X86
86
//#define DEBUG_BRIGHTNESS
87
#include "postprocess.h"
88
#include "postprocess_internal.h"
89

    
90
unsigned postproc_version(void)
91
{
92
    return LIBPOSTPROC_VERSION_INT;
93
}
94

    
95
#ifdef HAVE_ALTIVEC_H
96
#include <altivec.h>
97
#endif
98

    
99
#define GET_MODE_BUFFER_SIZE 500
100
#define OPTIONS_ARRAY_SIZE 10
101
#define BLOCK_SIZE 8
102
#define TEMP_STRIDE 8
103
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
104

    
105
#if defined(ARCH_X86)
106
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
107
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
108
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
109
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
110
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
111
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
112
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
113
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
114
#endif
115

    
116
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
117

    
118

    
119
static struct PPFilter filters[]=
120
{
121
    {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
122
    {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
123
/*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
124
    {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
125
    {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
126
    {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
127
    {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
128
    {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
129
    {"dr", "dering",                1, 5, 6, DERING},
130
    {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
131
    {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
132
    {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
133
    {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
134
    {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
135
    {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
136
    {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
137
    {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
138
    {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
139
    {NULL, NULL,0,0,0,0} //End Marker
140
};
141

    
142
static const char *replaceTable[]=
143
{
144
    "default",      "hb:a,vb:a,dr:a",
145
    "de",           "hb:a,vb:a,dr:a",
146
    "fast",         "h1:a,v1:a,dr:a",
147
    "fa",           "h1:a,v1:a,dr:a",
148
    "ac",           "ha:a:128:7,va:a,dr:a",
149
    NULL //End Marker
150
};
151

    
152

    
153
#if defined(ARCH_X86)
154
static inline void prefetchnta(void *p)
155
{
156
    __asm__ volatile(   "prefetchnta (%0)\n\t"
157
        : : "r" (p)
158
    );
159
}
160

    
161
static inline void prefetcht0(void *p)
162
{
163
    __asm__ volatile(   "prefetcht0 (%0)\n\t"
164
        : : "r" (p)
165
    );
166
}
167

    
168
static inline void prefetcht1(void *p)
169
{
170
    __asm__ volatile(   "prefetcht1 (%0)\n\t"
171
        : : "r" (p)
172
    );
173
}
174

    
175
static inline void prefetcht2(void *p)
176
{
177
    __asm__ volatile(   "prefetcht2 (%0)\n\t"
178
        : : "r" (p)
179
    );
180
}
181
#endif
182

    
183
/* The horizontal functions exist only in C because the MMX
184
 * code is faster with vertical filters and transposing. */
185

    
186
/**
187
 * Check if the given 8x8 Block is mostly "flat"
188
 */
189
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
190
{
191
    int numEq= 0;
192
    int y;
193
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
194
    const int dcThreshold= dcOffset*2 + 1;
195

    
196
    for(y=0; y<BLOCK_SIZE; y++){
197
        if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
198
        if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
199
        if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
200
        if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
201
        if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
202
        if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
203
        if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
204
        src+= stride;
205
    }
206
    return numEq > c->ppMode.flatnessThreshold;
207
}
208

    
209
/**
210
 * Check if the middle 8x8 Block in the given 8x16 block is flat
211
 */
212
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
213
{
214
    int numEq= 0;
215
    int y;
216
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
217
    const int dcThreshold= dcOffset*2 + 1;
218

    
219
    src+= stride*4; // src points to begin of the 8x8 Block
220
    for(y=0; y<BLOCK_SIZE-1; y++){
221
        if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
222
        if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
223
        if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
224
        if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
225
        if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
226
        if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
227
        if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
228
        if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
229
        src+= stride;
230
    }
231
    return numEq > c->ppMode.flatnessThreshold;
232
}
233

    
234
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
235
{
236
    int i;
237
#if 1
238
    for(i=0; i<2; i++){
239
        if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
240
        src += stride;
241
        if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
242
        src += stride;
243
        if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
244
        src += stride;
245
        if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
246
        src += stride;
247
    }
248
#else
249
    for(i=0; i<8; i++){
250
        if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
251
        src += stride;
252
    }
253
#endif
254
    return 1;
255
}
256

    
257
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
258
{
259
#if 1
260
#if 1
261
    int x;
262
    src+= stride*4;
263
    for(x=0; x<BLOCK_SIZE; x+=4){
264
        if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
265
        if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
266
        if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
267
        if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
268
    }
269
#else
270
    int x;
271
    src+= stride*3;
272
    for(x=0; x<BLOCK_SIZE; x++){
273
        if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
274
    }
275
#endif
276
    return 1;
277
#else
278
    int x;
279
    src+= stride*4;
280
    for(x=0; x<BLOCK_SIZE; x++){
281
        int min=255;
282
        int max=0;
283
        int y;
284
        for(y=0; y<8; y++){
285
            int v= src[x + y*stride];
286
            if(v>max) max=v;
287
            if(v<min) min=v;
288
        }
289
        if(max-min > 2*QP) return 0;
290
    }
291
    return 1;
292
#endif
293
}
294

    
295
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
296
{
297
    if( isHorizDC_C(src, stride, c) ){
298
        if( isHorizMinMaxOk_C(src, stride, c->QP) )
299
            return 1;
300
        else
301
            return 0;
302
    }else{
303
        return 2;
304
    }
305
}
306

    
307
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
308
{
309
    if( isVertDC_C(src, stride, c) ){
310
        if( isVertMinMaxOk_C(src, stride, c->QP) )
311
            return 1;
312
        else
313
            return 0;
314
    }else{
315
        return 2;
316
    }
317
}
318

    
319
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
320
{
321
    int y;
322
    for(y=0; y<BLOCK_SIZE; y++){
323
        const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
324

    
325
        if(FFABS(middleEnergy) < 8*c->QP){
326
            const int q=(dst[3] - dst[4])/2;
327
            const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
328
            const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
329

    
330
            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
331
            d= FFMAX(d, 0);
332

    
333
            d= (5*d + 32) >> 6;
334
            d*= FFSIGN(-middleEnergy);
335

    
336
            if(q>0)
337
            {
338
                d= d<0 ? 0 : d;
339
                d= d>q ? q : d;
340
            }
341
            else
342
            {
343
                d= d>0 ? 0 : d;
344
                d= d<q ? q : d;
345
            }
346

    
347
            dst[3]-= d;
348
            dst[4]+= d;
349
        }
350
        dst+= stride;
351
    }
352
}
353

    
354
/**
355
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
356
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
357
 */
358
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
359
{
360
    int y;
361
    for(y=0; y<BLOCK_SIZE; y++){
362
        const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
363
        const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
364

    
365
        int sums[10];
366
        sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
367
        sums[1] = sums[0] - first  + dst[3];
368
        sums[2] = sums[1] - first  + dst[4];
369
        sums[3] = sums[2] - first  + dst[5];
370
        sums[4] = sums[3] - first  + dst[6];
371
        sums[5] = sums[4] - dst[0] + dst[7];
372
        sums[6] = sums[5] - dst[1] + last;
373
        sums[7] = sums[6] - dst[2] + last;
374
        sums[8] = sums[7] - dst[3] + last;
375
        sums[9] = sums[8] - dst[4] + last;
376

    
377
        dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
378
        dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
379
        dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
380
        dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
381
        dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
382
        dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
383
        dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
384
        dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
385

    
386
        dst+= stride;
387
    }
388
}
389

    
390
/**
391
 * Experimental Filter 1 (Horizontal)
392
 * will not damage linear gradients
393
 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
394
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
395
 * MMX2 version does correct clipping C version does not
396
 * not identical with the vertical one
397
 */
398
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
399
{
400
    int y;
401
    static uint64_t *lut= NULL;
402
    if(lut==NULL)
403
    {
404
        int i;
405
        lut = av_malloc(256*8);
406
        for(i=0; i<256; i++)
407
        {
408
            int v= i < 128 ? 2*i : 2*(i-256);
409
/*
410
//Simulate 112242211 9-Tap filter
411
            uint64_t a= (v/16)  & 0xFF;
412
            uint64_t b= (v/8)   & 0xFF;
413
            uint64_t c= (v/4)   & 0xFF;
414
            uint64_t d= (3*v/8) & 0xFF;
415
*/
416
//Simulate piecewise linear interpolation
417
            uint64_t a= (v/16)   & 0xFF;
418
            uint64_t b= (v*3/16) & 0xFF;
419
            uint64_t c= (v*5/16) & 0xFF;
420
            uint64_t d= (7*v/16) & 0xFF;
421
            uint64_t A= (0x100 - a)&0xFF;
422
            uint64_t B= (0x100 - b)&0xFF;
423
            uint64_t C= (0x100 - c)&0xFF;
424
            uint64_t D= (0x100 - c)&0xFF;
425

    
426
            lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
427
                       (D<<24) | (C<<16) | (B<<8)  | (A);
428
            //lut[i] = (v<<32) | (v<<24);
429
        }
430
    }
431

    
432
    for(y=0; y<BLOCK_SIZE; y++){
433
        int a= src[1] - src[2];
434
        int b= src[3] - src[4];
435
        int c= src[5] - src[6];
436

    
437
        int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
438

    
439
        if(d < QP){
440
            int v = d * FFSIGN(-b);
441

    
442
            src[1] +=v/8;
443
            src[2] +=v/4;
444
            src[3] +=3*v/8;
445
            src[4] -=3*v/8;
446
            src[5] -=v/4;
447
            src[6] -=v/8;
448
        }
449
        src+=stride;
450
    }
451
}
452

    
453
/**
454
 * accurate deblock filter
455
 */
456
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
457
    int y;
458
    const int QP= c->QP;
459
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
460
    const int dcThreshold= dcOffset*2 + 1;
461
//START_TIMER
462
    src+= step*4; // src points to begin of the 8x8 Block
463
    for(y=0; y<8; y++){
464
        int numEq= 0;
465

    
466
        if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
467
        if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
468
        if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
469
        if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
470
        if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
471
        if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
472
        if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
473
        if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
474
        if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
475
        if(numEq > c->ppMode.flatnessThreshold){
476
            int min, max, x;
477

    
478
            if(src[0] > src[step]){
479
                max= src[0];
480
                min= src[step];
481
            }else{
482
                max= src[step];
483
                min= src[0];
484
            }
485
            for(x=2; x<8; x+=2){
486
                if(src[x*step] > src[(x+1)*step]){
487
                        if(src[x    *step] > max) max= src[ x   *step];
488
                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
489
                }else{
490
                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
491
                        if(src[ x   *step] < min) min= src[ x   *step];
492
                }
493
            }
494
            if(max-min < 2*QP){
495
                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
496
                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
497

    
498
                int sums[10];
499
                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
500
                sums[1] = sums[0] - first       + src[3*step];
501
                sums[2] = sums[1] - first       + src[4*step];
502
                sums[3] = sums[2] - first       + src[5*step];
503
                sums[4] = sums[3] - first       + src[6*step];
504
                sums[5] = sums[4] - src[0*step] + src[7*step];
505
                sums[6] = sums[5] - src[1*step] + last;
506
                sums[7] = sums[6] - src[2*step] + last;
507
                sums[8] = sums[7] - src[3*step] + last;
508
                sums[9] = sums[8] - src[4*step] + last;
509

    
510
                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
511
                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
512
                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
513
                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
514
                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
515
                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
516
                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
517
                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
518
            }
519
        }else{
520
            const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
521

    
522
            if(FFABS(middleEnergy) < 8*QP){
523
                const int q=(src[3*step] - src[4*step])/2;
524
                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
525
                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
526

    
527
                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
528
                d= FFMAX(d, 0);
529

    
530
                d= (5*d + 32) >> 6;
531
                d*= FFSIGN(-middleEnergy);
532

    
533
                if(q>0){
534
                    d= d<0 ? 0 : d;
535
                    d= d>q ? q : d;
536
                }else{
537
                    d= d>0 ? 0 : d;
538
                    d= d<q ? q : d;
539
                }
540

    
541
                src[3*step]-= d;
542
                src[4*step]+= d;
543
            }
544
        }
545

    
546
        src += stride;
547
    }
548
/*if(step==16){
549
    STOP_TIMER("step16")
550
}else{
551
    STOP_TIMER("stepX")
552
}*/
553
}
554

    
555
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
556
//Plain C versions
557
#if !(defined (HAVE_MMX) || defined (HAVE_ALTIVEC)) || defined (RUNTIME_CPUDETECT)
558
#define COMPILE_C
559
#endif
560

    
561
#ifdef HAVE_ALTIVEC
562
#define COMPILE_ALTIVEC
563
#endif //HAVE_ALTIVEC
564

    
565
#if defined(ARCH_X86)
566

    
567
#if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
568
#define COMPILE_MMX
569
#endif
570

    
571
#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT)
572
#define COMPILE_MMX2
573
#endif
574

    
575
#if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
576
#define COMPILE_3DNOW
577
#endif
578
#endif /* defined(ARCH_X86) */
579

    
580
#undef HAVE_MMX
581
#undef HAVE_MMX2
582
#undef HAVE_3DNOW
583
#undef HAVE_ALTIVEC
584

    
585
#ifdef COMPILE_C
586
#undef HAVE_MMX
587
#undef HAVE_MMX2
588
#undef HAVE_3DNOW
589
#define RENAME(a) a ## _C
590
#include "postprocess_template.c"
591
#endif
592

    
593
#ifdef COMPILE_ALTIVEC
594
#undef RENAME
595
#define HAVE_ALTIVEC
596
#define RENAME(a) a ## _altivec
597
#include "postprocess_altivec_template.c"
598
#include "postprocess_template.c"
599
#endif
600

    
601
//MMX versions
602
#ifdef COMPILE_MMX
603
#undef RENAME
604
#define HAVE_MMX
605
#undef HAVE_MMX2
606
#undef HAVE_3DNOW
607
#define RENAME(a) a ## _MMX
608
#include "postprocess_template.c"
609
#endif
610

    
611
//MMX2 versions
612
#ifdef COMPILE_MMX2
613
#undef RENAME
614
#define HAVE_MMX
615
#define HAVE_MMX2
616
#undef HAVE_3DNOW
617
#define RENAME(a) a ## _MMX2
618
#include "postprocess_template.c"
619
#endif
620

    
621
//3DNOW versions
622
#ifdef COMPILE_3DNOW
623
#undef RENAME
624
#define HAVE_MMX
625
#undef HAVE_MMX2
626
#define HAVE_3DNOW
627
#define RENAME(a) a ## _3DNow
628
#include "postprocess_template.c"
629
#endif
630

    
631
// minor note: the HAVE_xyz is messed up after that line so do not use it.
632

    
633
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
634
        const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
635
{
636
    PPContext *c= (PPContext *)vc;
637
    PPMode *ppMode= (PPMode *)vm;
638
    c->ppMode= *ppMode; //FIXME
639

    
640
    // Using ifs here as they are faster than function pointers although the
641
    // difference would not be measurable here but it is much better because
642
    // someone might exchange the CPU whithout restarting MPlayer ;)
643
#ifdef RUNTIME_CPUDETECT
644
#if defined(ARCH_X86)
645
    // ordered per speed fastest first
646
    if(c->cpuCaps & PP_CPU_CAPS_MMX2)
647
        postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
648
    else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
649
        postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
650
    else if(c->cpuCaps & PP_CPU_CAPS_MMX)
651
        postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
652
    else
653
        postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
654
#else
655
#ifdef HAVE_ALTIVEC
656
    if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
657
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
658
    else
659
#endif
660
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
661
#endif
662
#else //RUNTIME_CPUDETECT
663
#ifdef HAVE_MMX2
664
            postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
665
#elif defined (HAVE_3DNOW)
666
            postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
667
#elif defined (HAVE_MMX)
668
            postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
669
#elif defined (HAVE_ALTIVEC)
670
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
671
#else
672
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
673
#endif
674
#endif //!RUNTIME_CPUDETECT
675
}
676

    
677
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
678
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
679

    
680
/* -pp Command line Help
681
*/
682
#if LIBPOSTPROC_VERSION_INT < (52<<16)
683
const char *const pp_help=
684
#else
685
const char pp_help[] =
686
#endif
687
"Available postprocessing filters:\n"
688
"Filters                        Options\n"
689
"short  long name       short   long option     Description\n"
690
"*      *               a       autoq           CPU power dependent enabler\n"
691
"                       c       chrom           chrominance filtering enabled\n"
692
"                       y       nochrom         chrominance filtering disabled\n"
693
"                       n       noluma          luma filtering disabled\n"
694
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
695
"       1. difference factor: default=32, higher -> more deblocking\n"
696
"       2. flatness threshold: default=39, lower -> more deblocking\n"
697
"                       the h & v deblocking filters share these\n"
698
"                       so you can't set different thresholds for h / v\n"
699
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
700
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
701
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
702
"h1     x1hdeblock                              experimental h deblock filter 1\n"
703
"v1     x1vdeblock                              experimental v deblock filter 1\n"
704
"dr     dering                                  deringing filter\n"
705
"al     autolevels                              automatic brightness / contrast\n"
706
"                       f        fullyrange     stretch luminance to (0..255)\n"
707
"lb     linblenddeint                           linear blend deinterlacer\n"
708
"li     linipoldeint                            linear interpolating deinterlace\n"
709
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
710
"md     mediandeint                             median deinterlacer\n"
711
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
712
"l5     lowpass5                                FIR lowpass deinterlacer\n"
713
"de     default                                 hb:a,vb:a,dr:a\n"
714
"fa     fast                                    h1:a,v1:a,dr:a\n"
715
"ac                                             ha:a:128:7,va:a,dr:a\n"
716
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
717
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
718
"fq     forceQuant      <quantizer>             force quantizer\n"
719
"Usage:\n"
720
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
721
"long form example:\n"
722
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
723
"short form example:\n"
724
"vb:a/hb:a/lb                                   de,-vb\n"
725
"more examples:\n"
726
"tn:64:128:256\n"
727
"\n"
728
;
729

    
730
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
731
{
732
    char temp[GET_MODE_BUFFER_SIZE];
733
    char *p= temp;
734
    static const char filterDelimiters[] = ",/";
735
    static const char optionDelimiters[] = ":";
736
    struct PPMode *ppMode;
737
    char *filterToken;
738

    
739
    ppMode= av_malloc(sizeof(PPMode));
740

    
741
    ppMode->lumMode= 0;
742
    ppMode->chromMode= 0;
743
    ppMode->maxTmpNoise[0]= 700;
744
    ppMode->maxTmpNoise[1]= 1500;
745
    ppMode->maxTmpNoise[2]= 3000;
746
    ppMode->maxAllowedY= 234;
747
    ppMode->minAllowedY= 16;
748
    ppMode->baseDcDiff= 256/8;
749
    ppMode->flatnessThreshold= 56-16-1;
750
    ppMode->maxClippedThreshold= 0.01;
751
    ppMode->error=0;
752

    
753
    strncpy(temp, name, GET_MODE_BUFFER_SIZE);
754

    
755
    av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
756

    
757
    for(;;){
758
        char *filterName;
759
        int q= 1000000; //PP_QUALITY_MAX;
760
        int chrom=-1;
761
        int luma=-1;
762
        char *option;
763
        char *options[OPTIONS_ARRAY_SIZE];
764
        int i;
765
        int filterNameOk=0;
766
        int numOfUnknownOptions=0;
767
        int enable=1; //does the user want us to enabled or disabled the filter
768

    
769
        filterToken= strtok(p, filterDelimiters);
770
        if(filterToken == NULL) break;
771
        p+= strlen(filterToken) + 1; // p points to next filterToken
772
        filterName= strtok(filterToken, optionDelimiters);
773
        av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
774

    
775
        if(*filterName == '-'){
776
            enable=0;
777
            filterName++;
778
        }
779

    
780
        for(;;){ //for all options
781
            option= strtok(NULL, optionDelimiters);
782
            if(option == NULL) break;
783

    
784
            av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
785
            if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
786
            else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
787
            else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
788
            else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
789
            else{
790
                options[numOfUnknownOptions] = option;
791
                numOfUnknownOptions++;
792
            }
793
            if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
794
        }
795
        options[numOfUnknownOptions] = NULL;
796

    
797
        /* replace stuff from the replace Table */
798
        for(i=0; replaceTable[2*i]!=NULL; i++){
799
            if(!strcmp(replaceTable[2*i], filterName)){
800
                int newlen= strlen(replaceTable[2*i + 1]);
801
                int plen;
802
                int spaceLeft;
803

    
804
                if(p==NULL) p= temp, *p=0;      //last filter
805
                else p--, *p=',';               //not last filter
806

    
807
                plen= strlen(p);
808
                spaceLeft= p - temp + plen;
809
                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
810
                    ppMode->error++;
811
                    break;
812
                }
813
                memmove(p + newlen, p, plen+1);
814
                memcpy(p, replaceTable[2*i + 1], newlen);
815
                filterNameOk=1;
816
            }
817
        }
818

    
819
        for(i=0; filters[i].shortName!=NULL; i++){
820
            if(   !strcmp(filters[i].longName, filterName)
821
               || !strcmp(filters[i].shortName, filterName)){
822
                ppMode->lumMode &= ~filters[i].mask;
823
                ppMode->chromMode &= ~filters[i].mask;
824

    
825
                filterNameOk=1;
826
                if(!enable) break; // user wants to disable it
827

    
828
                if(q >= filters[i].minLumQuality && luma)
829
                    ppMode->lumMode|= filters[i].mask;
830
                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
831
                    if(q >= filters[i].minChromQuality)
832
                            ppMode->chromMode|= filters[i].mask;
833

    
834
                if(filters[i].mask == LEVEL_FIX){
835
                    int o;
836
                    ppMode->minAllowedY= 16;
837
                    ppMode->maxAllowedY= 234;
838
                    for(o=0; options[o]!=NULL; o++){
839
                        if(  !strcmp(options[o],"fullyrange")
840
                           ||!strcmp(options[o],"f")){
841
                            ppMode->minAllowedY= 0;
842
                            ppMode->maxAllowedY= 255;
843
                            numOfUnknownOptions--;
844
                        }
845
                    }
846
                }
847
                else if(filters[i].mask == TEMP_NOISE_FILTER)
848
                {
849
                    int o;
850
                    int numOfNoises=0;
851

    
852
                    for(o=0; options[o]!=NULL; o++){
853
                        char *tail;
854
                        ppMode->maxTmpNoise[numOfNoises]=
855
                            strtol(options[o], &tail, 0);
856
                        if(tail!=options[o]){
857
                            numOfNoises++;
858
                            numOfUnknownOptions--;
859
                            if(numOfNoises >= 3) break;
860
                        }
861
                    }
862
                }
863
                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
864
                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
865
                    int o;
866

    
867
                    for(o=0; options[o]!=NULL && o<2; o++){
868
                        char *tail;
869
                        int val= strtol(options[o], &tail, 0);
870
                        if(tail==options[o]) break;
871

    
872
                        numOfUnknownOptions--;
873
                        if(o==0) ppMode->baseDcDiff= val;
874
                        else ppMode->flatnessThreshold= val;
875
                    }
876
                }
877
                else if(filters[i].mask == FORCE_QUANT){
878
                    int o;
879
                    ppMode->forcedQuant= 15;
880

    
881
                    for(o=0; options[o]!=NULL && o<1; o++){
882
                        char *tail;
883
                        int val= strtol(options[o], &tail, 0);
884
                        if(tail==options[o]) break;
885

    
886
                        numOfUnknownOptions--;
887
                        ppMode->forcedQuant= val;
888
                    }
889
                }
890
            }
891
        }
892
        if(!filterNameOk) ppMode->error++;
893
        ppMode->error += numOfUnknownOptions;
894
    }
895

    
896
    av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
897
    if(ppMode->error){
898
        av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
899
        av_free(ppMode);
900
        return NULL;
901
    }
902
    return ppMode;
903
}
904

    
905
void pp_free_mode(pp_mode *mode){
906
    av_free(mode);
907
}
908

    
909
static void reallocAlign(void **p, int alignment, int size){
910
    av_free(*p);
911
    *p= av_mallocz(size);
912
}
913

    
914
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
915
    int mbWidth = (width+15)>>4;
916
    int mbHeight= (height+15)>>4;
917
    int i;
918

    
919
    c->stride= stride;
920
    c->qpStride= qpStride;
921

    
922
    reallocAlign((void **)&c->tempDst, 8, stride*24);
923
    reallocAlign((void **)&c->tempSrc, 8, stride*24);
924
    reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
925
    reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
926
    for(i=0; i<256; i++)
927
            c->yHistogram[i]= width*height/64*15/256;
928

    
929
    for(i=0; i<3; i++){
930
        //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
931
        reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
932
        reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
933
    }
934

    
935
    reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
936
    reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
937
    reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
938
    reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
939
}
940

    
941
static const char * context_to_name(void * ptr) {
942
    return "postproc";
943
}
944

    
945
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
946

    
947
pp_context *pp_get_context(int width, int height, int cpuCaps){
948
    PPContext *c= av_malloc(sizeof(PPContext));
949
    int stride= (width+15)&(~15);    //assumed / will realloc if needed
950
    int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
951

    
952
    memset(c, 0, sizeof(PPContext));
953
    c->av_class = &av_codec_context_class;
954
    c->cpuCaps= cpuCaps;
955
    if(cpuCaps&PP_FORMAT){
956
        c->hChromaSubSample= cpuCaps&0x3;
957
        c->vChromaSubSample= (cpuCaps>>4)&0x3;
958
    }else{
959
        c->hChromaSubSample= 1;
960
        c->vChromaSubSample= 1;
961
    }
962

    
963
    reallocBuffers(c, width, height, stride, qpStride);
964

    
965
    c->frameNum=-1;
966

    
967
    return c;
968
}
969

    
970
void pp_free_context(void *vc){
971
    PPContext *c = (PPContext*)vc;
972
    int i;
973

    
974
    for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
975
    for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
976

    
977
    av_free(c->tempBlocks);
978
    av_free(c->yHistogram);
979
    av_free(c->tempDst);
980
    av_free(c->tempSrc);
981
    av_free(c->deintTemp);
982
    av_free(c->stdQPTable);
983
    av_free(c->nonBQPTable);
984
    av_free(c->forcedQPTable);
985

    
986
    memset(c, 0, sizeof(PPContext));
987

    
988
    av_free(c);
989
}
990

    
991
void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
992
                     uint8_t * dst[3], const int dstStride[3],
993
                     int width, int height,
994
                     const QP_STORE_T *QP_store,  int QPStride,
995
                     pp_mode *vm,  void *vc, int pict_type)
996
{
997
    int mbWidth = (width+15)>>4;
998
    int mbHeight= (height+15)>>4;
999
    PPMode *mode = (PPMode*)vm;
1000
    PPContext *c = (PPContext*)vc;
1001
    int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1002
    int absQPStride = FFABS(QPStride);
1003

    
1004
    // c->stride and c->QPStride are always positive
1005
    if(c->stride < minStride || c->qpStride < absQPStride)
1006
        reallocBuffers(c, width, height,
1007
                       FFMAX(minStride, c->stride),
1008
                       FFMAX(c->qpStride, absQPStride));
1009

    
1010
    if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1011
        int i;
1012
        QP_store= c->forcedQPTable;
1013
        absQPStride = QPStride = 0;
1014
        if(mode->lumMode & FORCE_QUANT)
1015
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1016
        else
1017
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1018
    }
1019

    
1020
    if(pict_type & PP_PICT_TYPE_QP2){
1021
        int i;
1022
        const int count= mbHeight * absQPStride;
1023
        for(i=0; i<(count>>2); i++){
1024
            ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1025
        }
1026
        for(i<<=2; i<count; i++){
1027
            c->stdQPTable[i] = QP_store[i]>>1;
1028
        }
1029
        QP_store= c->stdQPTable;
1030
        QPStride= absQPStride;
1031
    }
1032

    
1033
    if(0){
1034
        int x,y;
1035
        for(y=0; y<mbHeight; y++){
1036
            for(x=0; x<mbWidth; x++){
1037
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1038
            }
1039
            av_log(c, AV_LOG_INFO, "\n");
1040
        }
1041
        av_log(c, AV_LOG_INFO, "\n");
1042
    }
1043

    
1044
    if((pict_type&7)!=3){
1045
        if (QPStride >= 0){
1046
            int i;
1047
            const int count= mbHeight * QPStride;
1048
            for(i=0; i<(count>>2); i++){
1049
                ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1050
            }
1051
            for(i<<=2; i<count; i++){
1052
                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1053
            }
1054
        } else {
1055
            int i,j;
1056
            for(i=0; i<mbHeight; i++) {
1057
                for(j=0; j<absQPStride; j++) {
1058
                    c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1059
                }
1060
            }
1061
        }
1062
    }
1063

    
1064
    av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1065
           mode->lumMode, mode->chromMode);
1066

    
1067
    postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1068
                width, height, QP_store, QPStride, 0, mode, c);
1069

    
1070
    width  = (width )>>c->hChromaSubSample;
1071
    height = (height)>>c->vChromaSubSample;
1072

    
1073
    if(mode->chromMode){
1074
        postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1075
                    width, height, QP_store, QPStride, 1, mode, c);
1076
        postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1077
                    width, height, QP_store, QPStride, 2, mode, c);
1078
    }
1079
    else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1080
        linecpy(dst[1], src[1], height, srcStride[1]);
1081
        linecpy(dst[2], src[2], height, srcStride[2]);
1082
    }else{
1083
        int y;
1084
        for(y=0; y<height; y++){
1085
            memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1086
            memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1087
        }
1088
    }
1089
}
1090