Statistics
| Branch: | Revision:

ffmpeg / libpostproc / postprocess.c @ 1a5e4fd8

History | View | Annotate | Download (37.4 KB)

1
/*
2
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6
 * This file is part of Libav.
7
 *
8
 * Libav is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * Libav is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with Libav; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23
/**
24
 * @file
25
 * postprocessing.
26
 */
27

    
28
/*
29
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49

50
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
53
e = almost exact implementation (slightly different rounding,...)
54
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57
*/
58

    
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
        (the if/else stuff per block is slowing things down)
67
compare the quality & speed of all filters
68
split this huge file
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72
*/
73

    
74
//Changelog: use git log
75

    
76
#include "config.h"
77
#include "libavutil/avutil.h"
78
#include <inttypes.h>
79
#include <stdio.h>
80
#include <stdlib.h>
81
#include <string.h>
82
//#undef HAVE_MMX2
83
//#define HAVE_AMD3DNOW
84
//#undef HAVE_MMX
85
//#undef ARCH_X86
86
//#define DEBUG_BRIGHTNESS
87
#include "postprocess.h"
88
#include "postprocess_internal.h"
89
#include "libavutil/avstring.h"
90

    
91
unsigned postproc_version(void)
92
{
93
    return LIBPOSTPROC_VERSION_INT;
94
}
95

    
96
const char *postproc_configuration(void)
97
{
98
    return LIBAV_CONFIGURATION;
99
}
100

    
101
const char *postproc_license(void)
102
{
103
#define LICENSE_PREFIX "libpostproc license: "
104
    return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
105
}
106

    
107
#if HAVE_ALTIVEC_H
108
#include <altivec.h>
109
#endif
110

    
111
#define GET_MODE_BUFFER_SIZE 500
112
#define OPTIONS_ARRAY_SIZE 10
113
#define BLOCK_SIZE 8
114
#define TEMP_STRIDE 8
115
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
116

    
117
#if ARCH_X86
118
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
119
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
120
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
121
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
122
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
123
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
124
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
125
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
126
#endif
127

    
128
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
129

    
130

    
131
static struct PPFilter filters[]=
132
{
133
    {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
134
    {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
135
/*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
136
    {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
137
    {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
138
    {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
139
    {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
140
    {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
141
    {"dr", "dering",                1, 5, 6, DERING},
142
    {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
143
    {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
144
    {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
145
    {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
146
    {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
147
    {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
148
    {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
149
    {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
150
    {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
151
    {NULL, NULL,0,0,0,0} //End Marker
152
};
153

    
154
static const char *replaceTable[]=
155
{
156
    "default",      "hb:a,vb:a,dr:a",
157
    "de",           "hb:a,vb:a,dr:a",
158
    "fast",         "h1:a,v1:a,dr:a",
159
    "fa",           "h1:a,v1:a,dr:a",
160
    "ac",           "ha:a:128:7,va:a,dr:a",
161
    NULL //End Marker
162
};
163

    
164

    
165
#if ARCH_X86
166
static inline void prefetchnta(void *p)
167
{
168
    __asm__ volatile(   "prefetchnta (%0)\n\t"
169
        : : "r" (p)
170
    );
171
}
172

    
173
static inline void prefetcht0(void *p)
174
{
175
    __asm__ volatile(   "prefetcht0 (%0)\n\t"
176
        : : "r" (p)
177
    );
178
}
179

    
180
static inline void prefetcht1(void *p)
181
{
182
    __asm__ volatile(   "prefetcht1 (%0)\n\t"
183
        : : "r" (p)
184
    );
185
}
186

    
187
static inline void prefetcht2(void *p)
188
{
189
    __asm__ volatile(   "prefetcht2 (%0)\n\t"
190
        : : "r" (p)
191
    );
192
}
193
#endif
194

    
195
/* The horizontal functions exist only in C because the MMX
196
 * code is faster with vertical filters and transposing. */
197

    
198
/**
199
 * Check if the given 8x8 Block is mostly "flat"
200
 */
201
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
202
{
203
    int numEq= 0;
204
    int y;
205
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
206
    const int dcThreshold= dcOffset*2 + 1;
207

    
208
    for(y=0; y<BLOCK_SIZE; y++){
209
        if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
210
        if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
211
        if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
212
        if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
213
        if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
214
        if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
215
        if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
216
        src+= stride;
217
    }
218
    return numEq > c->ppMode.flatnessThreshold;
219
}
220

    
221
/**
222
 * Check if the middle 8x8 Block in the given 8x16 block is flat
223
 */
224
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
225
{
226
    int numEq= 0;
227
    int y;
228
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
229
    const int dcThreshold= dcOffset*2 + 1;
230

    
231
    src+= stride*4; // src points to begin of the 8x8 Block
232
    for(y=0; y<BLOCK_SIZE-1; y++){
233
        if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
234
        if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
235
        if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
236
        if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
237
        if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
238
        if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
239
        if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
240
        if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
241
        src+= stride;
242
    }
243
    return numEq > c->ppMode.flatnessThreshold;
244
}
245

    
246
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
247
{
248
    int i;
249
#if 1
250
    for(i=0; i<2; i++){
251
        if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
252
        src += stride;
253
        if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
254
        src += stride;
255
        if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
256
        src += stride;
257
        if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
258
        src += stride;
259
    }
260
#else
261
    for(i=0; i<8; i++){
262
        if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
263
        src += stride;
264
    }
265
#endif
266
    return 1;
267
}
268

    
269
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
270
{
271
#if 1
272
#if 1
273
    int x;
274
    src+= stride*4;
275
    for(x=0; x<BLOCK_SIZE; x+=4){
276
        if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
277
        if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
278
        if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
279
        if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
280
    }
281
#else
282
    int x;
283
    src+= stride*3;
284
    for(x=0; x<BLOCK_SIZE; x++){
285
        if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
286
    }
287
#endif
288
    return 1;
289
#else
290
    int x;
291
    src+= stride*4;
292
    for(x=0; x<BLOCK_SIZE; x++){
293
        int min=255;
294
        int max=0;
295
        int y;
296
        for(y=0; y<8; y++){
297
            int v= src[x + y*stride];
298
            if(v>max) max=v;
299
            if(v<min) min=v;
300
        }
301
        if(max-min > 2*QP) return 0;
302
    }
303
    return 1;
304
#endif
305
}
306

    
307
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
308
{
309
    if( isHorizDC_C(src, stride, c) ){
310
        if( isHorizMinMaxOk_C(src, stride, c->QP) )
311
            return 1;
312
        else
313
            return 0;
314
    }else{
315
        return 2;
316
    }
317
}
318

    
319
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
320
{
321
    if( isVertDC_C(src, stride, c) ){
322
        if( isVertMinMaxOk_C(src, stride, c->QP) )
323
            return 1;
324
        else
325
            return 0;
326
    }else{
327
        return 2;
328
    }
329
}
330

    
331
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
332
{
333
    int y;
334
    for(y=0; y<BLOCK_SIZE; y++){
335
        const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
336

    
337
        if(FFABS(middleEnergy) < 8*c->QP){
338
            const int q=(dst[3] - dst[4])/2;
339
            const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
340
            const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
341

    
342
            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
343
            d= FFMAX(d, 0);
344

    
345
            d= (5*d + 32) >> 6;
346
            d*= FFSIGN(-middleEnergy);
347

    
348
            if(q>0)
349
            {
350
                d= d<0 ? 0 : d;
351
                d= d>q ? q : d;
352
            }
353
            else
354
            {
355
                d= d>0 ? 0 : d;
356
                d= d<q ? q : d;
357
            }
358

    
359
            dst[3]-= d;
360
            dst[4]+= d;
361
        }
362
        dst+= stride;
363
    }
364
}
365

    
366
/**
367
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
368
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
369
 */
370
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
371
{
372
    int y;
373
    for(y=0; y<BLOCK_SIZE; y++){
374
        const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
375
        const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
376

    
377
        int sums[10];
378
        sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
379
        sums[1] = sums[0] - first  + dst[3];
380
        sums[2] = sums[1] - first  + dst[4];
381
        sums[3] = sums[2] - first  + dst[5];
382
        sums[4] = sums[3] - first  + dst[6];
383
        sums[5] = sums[4] - dst[0] + dst[7];
384
        sums[6] = sums[5] - dst[1] + last;
385
        sums[7] = sums[6] - dst[2] + last;
386
        sums[8] = sums[7] - dst[3] + last;
387
        sums[9] = sums[8] - dst[4] + last;
388

    
389
        dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
390
        dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
391
        dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
392
        dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
393
        dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
394
        dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
395
        dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
396
        dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
397

    
398
        dst+= stride;
399
    }
400
}
401

    
402
/**
403
 * Experimental Filter 1 (Horizontal)
404
 * will not damage linear gradients
405
 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
406
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
407
 * MMX2 version does correct clipping C version does not
408
 * not identical with the vertical one
409
 */
410
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
411
{
412
    int y;
413
    static uint64_t *lut= NULL;
414
    if(lut==NULL)
415
    {
416
        int i;
417
        lut = av_malloc(256*8);
418
        for(i=0; i<256; i++)
419
        {
420
            int v= i < 128 ? 2*i : 2*(i-256);
421
/*
422
//Simulate 112242211 9-Tap filter
423
            uint64_t a= (v/16)  & 0xFF;
424
            uint64_t b= (v/8)   & 0xFF;
425
            uint64_t c= (v/4)   & 0xFF;
426
            uint64_t d= (3*v/8) & 0xFF;
427
*/
428
//Simulate piecewise linear interpolation
429
            uint64_t a= (v/16)   & 0xFF;
430
            uint64_t b= (v*3/16) & 0xFF;
431
            uint64_t c= (v*5/16) & 0xFF;
432
            uint64_t d= (7*v/16) & 0xFF;
433
            uint64_t A= (0x100 - a)&0xFF;
434
            uint64_t B= (0x100 - b)&0xFF;
435
            uint64_t C= (0x100 - c)&0xFF;
436
            uint64_t D= (0x100 - c)&0xFF;
437

    
438
            lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
439
                       (D<<24) | (C<<16) | (B<<8)  | (A);
440
            //lut[i] = (v<<32) | (v<<24);
441
        }
442
    }
443

    
444
    for(y=0; y<BLOCK_SIZE; y++){
445
        int a= src[1] - src[2];
446
        int b= src[3] - src[4];
447
        int c= src[5] - src[6];
448

    
449
        int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
450

    
451
        if(d < QP){
452
            int v = d * FFSIGN(-b);
453

    
454
            src[1] +=v/8;
455
            src[2] +=v/4;
456
            src[3] +=3*v/8;
457
            src[4] -=3*v/8;
458
            src[5] -=v/4;
459
            src[6] -=v/8;
460
        }
461
        src+=stride;
462
    }
463
}
464

    
465
/**
466
 * accurate deblock filter
467
 */
468
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
469
    int y;
470
    const int QP= c->QP;
471
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
472
    const int dcThreshold= dcOffset*2 + 1;
473
//START_TIMER
474
    src+= step*4; // src points to begin of the 8x8 Block
475
    for(y=0; y<8; y++){
476
        int numEq= 0;
477

    
478
        if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
479
        if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
480
        if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
481
        if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
482
        if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
483
        if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
484
        if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
485
        if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
486
        if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
487
        if(numEq > c->ppMode.flatnessThreshold){
488
            int min, max, x;
489

    
490
            if(src[0] > src[step]){
491
                max= src[0];
492
                min= src[step];
493
            }else{
494
                max= src[step];
495
                min= src[0];
496
            }
497
            for(x=2; x<8; x+=2){
498
                if(src[x*step] > src[(x+1)*step]){
499
                        if(src[x    *step] > max) max= src[ x   *step];
500
                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
501
                }else{
502
                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
503
                        if(src[ x   *step] < min) min= src[ x   *step];
504
                }
505
            }
506
            if(max-min < 2*QP){
507
                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
508
                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
509

    
510
                int sums[10];
511
                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
512
                sums[1] = sums[0] - first       + src[3*step];
513
                sums[2] = sums[1] - first       + src[4*step];
514
                sums[3] = sums[2] - first       + src[5*step];
515
                sums[4] = sums[3] - first       + src[6*step];
516
                sums[5] = sums[4] - src[0*step] + src[7*step];
517
                sums[6] = sums[5] - src[1*step] + last;
518
                sums[7] = sums[6] - src[2*step] + last;
519
                sums[8] = sums[7] - src[3*step] + last;
520
                sums[9] = sums[8] - src[4*step] + last;
521

    
522
                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
523
                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
524
                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
525
                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
526
                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
527
                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
528
                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
529
                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
530
            }
531
        }else{
532
            const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
533

    
534
            if(FFABS(middleEnergy) < 8*QP){
535
                const int q=(src[3*step] - src[4*step])/2;
536
                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
537
                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
538

    
539
                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
540
                d= FFMAX(d, 0);
541

    
542
                d= (5*d + 32) >> 6;
543
                d*= FFSIGN(-middleEnergy);
544

    
545
                if(q>0){
546
                    d= d<0 ? 0 : d;
547
                    d= d>q ? q : d;
548
                }else{
549
                    d= d>0 ? 0 : d;
550
                    d= d<q ? q : d;
551
                }
552

    
553
                src[3*step]-= d;
554
                src[4*step]+= d;
555
            }
556
        }
557

    
558
        src += stride;
559
    }
560
/*if(step==16){
561
    STOP_TIMER("step16")
562
}else{
563
    STOP_TIMER("stepX")
564
}*/
565
}
566

    
567
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
568
//Plain C versions
569
#if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
570
#define COMPILE_C
571
#endif
572

    
573
#if HAVE_ALTIVEC
574
#define COMPILE_ALTIVEC
575
#endif //HAVE_ALTIVEC
576

    
577
#if ARCH_X86
578

    
579
#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
580
#define COMPILE_MMX
581
#endif
582

    
583
#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
584
#define COMPILE_MMX2
585
#endif
586

    
587
#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
588
#define COMPILE_3DNOW
589
#endif
590
#endif /* ARCH_X86 */
591

    
592
#undef HAVE_MMX
593
#define HAVE_MMX 0
594
#undef HAVE_MMX2
595
#define HAVE_MMX2 0
596
#undef HAVE_AMD3DNOW
597
#define HAVE_AMD3DNOW 0
598
#undef HAVE_ALTIVEC
599
#define HAVE_ALTIVEC 0
600

    
601
#ifdef COMPILE_C
602
#define RENAME(a) a ## _C
603
#include "postprocess_template.c"
604
#endif
605

    
606
#ifdef COMPILE_ALTIVEC
607
#undef RENAME
608
#undef HAVE_ALTIVEC
609
#define HAVE_ALTIVEC 1
610
#define RENAME(a) a ## _altivec
611
#include "postprocess_altivec_template.c"
612
#include "postprocess_template.c"
613
#endif
614

    
615
//MMX versions
616
#ifdef COMPILE_MMX
617
#undef RENAME
618
#undef HAVE_MMX
619
#define HAVE_MMX 1
620
#define RENAME(a) a ## _MMX
621
#include "postprocess_template.c"
622
#endif
623

    
624
//MMX2 versions
625
#ifdef COMPILE_MMX2
626
#undef RENAME
627
#undef HAVE_MMX
628
#undef HAVE_MMX2
629
#define HAVE_MMX 1
630
#define HAVE_MMX2 1
631
#define RENAME(a) a ## _MMX2
632
#include "postprocess_template.c"
633
#endif
634

    
635
//3DNOW versions
636
#ifdef COMPILE_3DNOW
637
#undef RENAME
638
#undef HAVE_MMX
639
#undef HAVE_MMX2
640
#undef HAVE_AMD3DNOW
641
#define HAVE_MMX 1
642
#define HAVE_MMX2 0
643
#define HAVE_AMD3DNOW 1
644
#define RENAME(a) a ## _3DNow
645
#include "postprocess_template.c"
646
#endif
647

    
648
// minor note: the HAVE_xyz is messed up after that line so do not use it.
649

    
650
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
651
        const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
652
{
653
    PPContext *c= (PPContext *)vc;
654
    PPMode *ppMode= (PPMode *)vm;
655
    c->ppMode= *ppMode; //FIXME
656

    
657
    // Using ifs here as they are faster than function pointers although the
658
    // difference would not be measurable here but it is much better because
659
    // someone might exchange the CPU whithout restarting MPlayer ;)
660
#if CONFIG_RUNTIME_CPUDETECT
661
#if ARCH_X86
662
    // ordered per speed fastest first
663
    if(c->cpuCaps & PP_CPU_CAPS_MMX2)
664
        postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
665
    else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
666
        postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
667
    else if(c->cpuCaps & PP_CPU_CAPS_MMX)
668
        postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
669
    else
670
        postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
671
#else
672
#if HAVE_ALTIVEC
673
    if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
674
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
675
    else
676
#endif
677
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
678
#endif
679
#else //CONFIG_RUNTIME_CPUDETECT
680
#if   HAVE_MMX2
681
            postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
682
#elif HAVE_AMD3DNOW
683
            postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
684
#elif HAVE_MMX
685
            postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
686
#elif HAVE_ALTIVEC
687
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
688
#else
689
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
690
#endif
691
#endif //!CONFIG_RUNTIME_CPUDETECT
692
}
693

    
694
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
695
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
696

    
697
/* -pp Command line Help
698
*/
699
const char pp_help[] =
700
"Available postprocessing filters:\n"
701
"Filters                        Options\n"
702
"short  long name       short   long option     Description\n"
703
"*      *               a       autoq           CPU power dependent enabler\n"
704
"                       c       chrom           chrominance filtering enabled\n"
705
"                       y       nochrom         chrominance filtering disabled\n"
706
"                       n       noluma          luma filtering disabled\n"
707
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
708
"       1. difference factor: default=32, higher -> more deblocking\n"
709
"       2. flatness threshold: default=39, lower -> more deblocking\n"
710
"                       the h & v deblocking filters share these\n"
711
"                       so you can't set different thresholds for h / v\n"
712
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
713
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
714
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
715
"h1     x1hdeblock                              experimental h deblock filter 1\n"
716
"v1     x1vdeblock                              experimental v deblock filter 1\n"
717
"dr     dering                                  deringing filter\n"
718
"al     autolevels                              automatic brightness / contrast\n"
719
"                       f        fullyrange     stretch luminance to (0..255)\n"
720
"lb     linblenddeint                           linear blend deinterlacer\n"
721
"li     linipoldeint                            linear interpolating deinterlace\n"
722
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
723
"md     mediandeint                             median deinterlacer\n"
724
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
725
"l5     lowpass5                                FIR lowpass deinterlacer\n"
726
"de     default                                 hb:a,vb:a,dr:a\n"
727
"fa     fast                                    h1:a,v1:a,dr:a\n"
728
"ac                                             ha:a:128:7,va:a,dr:a\n"
729
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
730
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
731
"fq     forceQuant      <quantizer>             force quantizer\n"
732
"Usage:\n"
733
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
734
"long form example:\n"
735
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
736
"short form example:\n"
737
"vb:a/hb:a/lb                                   de,-vb\n"
738
"more examples:\n"
739
"tn:64:128:256\n"
740
"\n"
741
;
742

    
743
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
744
{
745
    char temp[GET_MODE_BUFFER_SIZE];
746
    char *p= temp;
747
    static const char filterDelimiters[] = ",/";
748
    static const char optionDelimiters[] = ":";
749
    struct PPMode *ppMode;
750
    char *filterToken;
751

    
752
    ppMode= av_malloc(sizeof(PPMode));
753

    
754
    ppMode->lumMode= 0;
755
    ppMode->chromMode= 0;
756
    ppMode->maxTmpNoise[0]= 700;
757
    ppMode->maxTmpNoise[1]= 1500;
758
    ppMode->maxTmpNoise[2]= 3000;
759
    ppMode->maxAllowedY= 234;
760
    ppMode->minAllowedY= 16;
761
    ppMode->baseDcDiff= 256/8;
762
    ppMode->flatnessThreshold= 56-16-1;
763
    ppMode->maxClippedThreshold= 0.01;
764
    ppMode->error=0;
765

    
766
    av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE);
767

    
768
    av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
769

    
770
    for(;;){
771
        char *filterName;
772
        int q= 1000000; //PP_QUALITY_MAX;
773
        int chrom=-1;
774
        int luma=-1;
775
        char *option;
776
        char *options[OPTIONS_ARRAY_SIZE];
777
        int i;
778
        int filterNameOk=0;
779
        int numOfUnknownOptions=0;
780
        int enable=1; //does the user want us to enabled or disabled the filter
781

    
782
        filterToken= strtok(p, filterDelimiters);
783
        if(filterToken == NULL) break;
784
        p+= strlen(filterToken) + 1; // p points to next filterToken
785
        filterName= strtok(filterToken, optionDelimiters);
786
        av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
787

    
788
        if(*filterName == '-'){
789
            enable=0;
790
            filterName++;
791
        }
792

    
793
        for(;;){ //for all options
794
            option= strtok(NULL, optionDelimiters);
795
            if(option == NULL) break;
796

    
797
            av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
798
            if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
799
            else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
800
            else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
801
            else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
802
            else{
803
                options[numOfUnknownOptions] = option;
804
                numOfUnknownOptions++;
805
            }
806
            if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
807
        }
808
        options[numOfUnknownOptions] = NULL;
809

    
810
        /* replace stuff from the replace Table */
811
        for(i=0; replaceTable[2*i]!=NULL; i++){
812
            if(!strcmp(replaceTable[2*i], filterName)){
813
                int newlen= strlen(replaceTable[2*i + 1]);
814
                int plen;
815
                int spaceLeft;
816

    
817
                if(p==NULL) p= temp, *p=0;      //last filter
818
                else p--, *p=',';               //not last filter
819

    
820
                plen= strlen(p);
821
                spaceLeft= p - temp + plen;
822
                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
823
                    ppMode->error++;
824
                    break;
825
                }
826
                memmove(p + newlen, p, plen+1);
827
                memcpy(p, replaceTable[2*i + 1], newlen);
828
                filterNameOk=1;
829
            }
830
        }
831

    
832
        for(i=0; filters[i].shortName!=NULL; i++){
833
            if(   !strcmp(filters[i].longName, filterName)
834
               || !strcmp(filters[i].shortName, filterName)){
835
                ppMode->lumMode &= ~filters[i].mask;
836
                ppMode->chromMode &= ~filters[i].mask;
837

    
838
                filterNameOk=1;
839
                if(!enable) break; // user wants to disable it
840

    
841
                if(q >= filters[i].minLumQuality && luma)
842
                    ppMode->lumMode|= filters[i].mask;
843
                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
844
                    if(q >= filters[i].minChromQuality)
845
                            ppMode->chromMode|= filters[i].mask;
846

    
847
                if(filters[i].mask == LEVEL_FIX){
848
                    int o;
849
                    ppMode->minAllowedY= 16;
850
                    ppMode->maxAllowedY= 234;
851
                    for(o=0; options[o]!=NULL; o++){
852
                        if(  !strcmp(options[o],"fullyrange")
853
                           ||!strcmp(options[o],"f")){
854
                            ppMode->minAllowedY= 0;
855
                            ppMode->maxAllowedY= 255;
856
                            numOfUnknownOptions--;
857
                        }
858
                    }
859
                }
860
                else if(filters[i].mask == TEMP_NOISE_FILTER)
861
                {
862
                    int o;
863
                    int numOfNoises=0;
864

    
865
                    for(o=0; options[o]!=NULL; o++){
866
                        char *tail;
867
                        ppMode->maxTmpNoise[numOfNoises]=
868
                            strtol(options[o], &tail, 0);
869
                        if(tail!=options[o]){
870
                            numOfNoises++;
871
                            numOfUnknownOptions--;
872
                            if(numOfNoises >= 3) break;
873
                        }
874
                    }
875
                }
876
                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
877
                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
878
                    int o;
879

    
880
                    for(o=0; options[o]!=NULL && o<2; o++){
881
                        char *tail;
882
                        int val= strtol(options[o], &tail, 0);
883
                        if(tail==options[o]) break;
884

    
885
                        numOfUnknownOptions--;
886
                        if(o==0) ppMode->baseDcDiff= val;
887
                        else ppMode->flatnessThreshold= val;
888
                    }
889
                }
890
                else if(filters[i].mask == FORCE_QUANT){
891
                    int o;
892
                    ppMode->forcedQuant= 15;
893

    
894
                    for(o=0; options[o]!=NULL && o<1; o++){
895
                        char *tail;
896
                        int val= strtol(options[o], &tail, 0);
897
                        if(tail==options[o]) break;
898

    
899
                        numOfUnknownOptions--;
900
                        ppMode->forcedQuant= val;
901
                    }
902
                }
903
            }
904
        }
905
        if(!filterNameOk) ppMode->error++;
906
        ppMode->error += numOfUnknownOptions;
907
    }
908

    
909
    av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
910
    if(ppMode->error){
911
        av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
912
        av_free(ppMode);
913
        return NULL;
914
    }
915
    return ppMode;
916
}
917

    
918
void pp_free_mode(pp_mode *mode){
919
    av_free(mode);
920
}
921

    
922
static void reallocAlign(void **p, int alignment, int size){
923
    av_free(*p);
924
    *p= av_mallocz(size);
925
}
926

    
927
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
928
    int mbWidth = (width+15)>>4;
929
    int mbHeight= (height+15)>>4;
930
    int i;
931

    
932
    c->stride= stride;
933
    c->qpStride= qpStride;
934

    
935
    reallocAlign((void **)&c->tempDst, 8, stride*24);
936
    reallocAlign((void **)&c->tempSrc, 8, stride*24);
937
    reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
938
    reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
939
    for(i=0; i<256; i++)
940
            c->yHistogram[i]= width*height/64*15/256;
941

    
942
    for(i=0; i<3; i++){
943
        //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
944
        reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
945
        reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
946
    }
947

    
948
    reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
949
    reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
950
    reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
951
    reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
952
}
953

    
954
static const char * context_to_name(void * ptr) {
955
    return "postproc";
956
}
957

    
958
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
959

    
960
pp_context *pp_get_context(int width, int height, int cpuCaps){
961
    PPContext *c= av_malloc(sizeof(PPContext));
962
    int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
963
    int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
964

    
965
    memset(c, 0, sizeof(PPContext));
966
    c->av_class = &av_codec_context_class;
967
    c->cpuCaps= cpuCaps;
968
    if(cpuCaps&PP_FORMAT){
969
        c->hChromaSubSample= cpuCaps&0x3;
970
        c->vChromaSubSample= (cpuCaps>>4)&0x3;
971
    }else{
972
        c->hChromaSubSample= 1;
973
        c->vChromaSubSample= 1;
974
    }
975

    
976
    reallocBuffers(c, width, height, stride, qpStride);
977

    
978
    c->frameNum=-1;
979

    
980
    return c;
981
}
982

    
983
void pp_free_context(void *vc){
984
    PPContext *c = (PPContext*)vc;
985
    int i;
986

    
987
    for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
988
    for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
989

    
990
    av_free(c->tempBlocks);
991
    av_free(c->yHistogram);
992
    av_free(c->tempDst);
993
    av_free(c->tempSrc);
994
    av_free(c->deintTemp);
995
    av_free(c->stdQPTable);
996
    av_free(c->nonBQPTable);
997
    av_free(c->forcedQPTable);
998

    
999
    memset(c, 0, sizeof(PPContext));
1000

    
1001
    av_free(c);
1002
}
1003

    
1004
void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
1005
                     uint8_t * dst[3], const int dstStride[3],
1006
                     int width, int height,
1007
                     const QP_STORE_T *QP_store,  int QPStride,
1008
                     pp_mode *vm,  void *vc, int pict_type)
1009
{
1010
    int mbWidth = (width+15)>>4;
1011
    int mbHeight= (height+15)>>4;
1012
    PPMode *mode = (PPMode*)vm;
1013
    PPContext *c = (PPContext*)vc;
1014
    int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1015
    int absQPStride = FFABS(QPStride);
1016

    
1017
    // c->stride and c->QPStride are always positive
1018
    if(c->stride < minStride || c->qpStride < absQPStride)
1019
        reallocBuffers(c, width, height,
1020
                       FFMAX(minStride, c->stride),
1021
                       FFMAX(c->qpStride, absQPStride));
1022

    
1023
    if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1024
        int i;
1025
        QP_store= c->forcedQPTable;
1026
        absQPStride = QPStride = 0;
1027
        if(mode->lumMode & FORCE_QUANT)
1028
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1029
        else
1030
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1031
    }
1032

    
1033
    if(pict_type & PP_PICT_TYPE_QP2){
1034
        int i;
1035
        const int count= mbHeight * absQPStride;
1036
        for(i=0; i<(count>>2); i++){
1037
            ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1038
        }
1039
        for(i<<=2; i<count; i++){
1040
            c->stdQPTable[i] = QP_store[i]>>1;
1041
        }
1042
        QP_store= c->stdQPTable;
1043
        QPStride= absQPStride;
1044
    }
1045

    
1046
    if(0){
1047
        int x,y;
1048
        for(y=0; y<mbHeight; y++){
1049
            for(x=0; x<mbWidth; x++){
1050
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1051
            }
1052
            av_log(c, AV_LOG_INFO, "\n");
1053
        }
1054
        av_log(c, AV_LOG_INFO, "\n");
1055
    }
1056

    
1057
    if((pict_type&7)!=3){
1058
        if (QPStride >= 0){
1059
            int i;
1060
            const int count= mbHeight * QPStride;
1061
            for(i=0; i<(count>>2); i++){
1062
                ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1063
            }
1064
            for(i<<=2; i<count; i++){
1065
                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1066
            }
1067
        } else {
1068
            int i,j;
1069
            for(i=0; i<mbHeight; i++) {
1070
                for(j=0; j<absQPStride; j++) {
1071
                    c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1072
                }
1073
            }
1074
        }
1075
    }
1076

    
1077
    av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1078
           mode->lumMode, mode->chromMode);
1079

    
1080
    postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1081
                width, height, QP_store, QPStride, 0, mode, c);
1082

    
1083
    width  = (width )>>c->hChromaSubSample;
1084
    height = (height)>>c->vChromaSubSample;
1085

    
1086
    if(mode->chromMode){
1087
        postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1088
                    width, height, QP_store, QPStride, 1, mode, c);
1089
        postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1090
                    width, height, QP_store, QPStride, 2, mode, c);
1091
    }
1092
    else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1093
        linecpy(dst[1], src[1], height, srcStride[1]);
1094
        linecpy(dst[2], src[2], height, srcStride[2]);
1095
    }else{
1096
        int y;
1097
        for(y=0; y<height; y++){
1098
            memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1099
            memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1100
        }
1101
    }
1102
}
1103