Statistics
| Branch: | Revision:

ffmpeg / libpostproc / postprocess.c @ a81ff195

History | View | Annotate | Download (37.4 KB)

1
/*
2
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6
 * This file is part of Libav.
7
 *
8
 * Libav is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * Libav is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with Libav; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23
/**
24
 * @file
25
 * postprocessing.
26
 */
27

    
28
/*
29
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49

50
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
53
e = almost exact implementation (slightly different rounding,...)
54
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57
*/
58

    
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
        (the if/else stuff per block is slowing things down)
67
compare the quality & speed of all filters
68
split this huge file
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72
*/
73

    
74
//Changelog: use git log
75

    
76
#include "config.h"
77
#include "libavutil/avutil.h"
78
#include <inttypes.h>
79
#include <stdio.h>
80
#include <stdlib.h>
81
#include <string.h>
82
//#undef HAVE_MMX2
83
//#define HAVE_AMD3DNOW
84
//#undef HAVE_MMX
85
//#undef ARCH_X86
86
//#define DEBUG_BRIGHTNESS
87
#include "postprocess.h"
88
#include "postprocess_internal.h"
89

    
90
unsigned postproc_version(void)
91
{
92
    return LIBPOSTPROC_VERSION_INT;
93
}
94

    
95
const char *postproc_configuration(void)
96
{
97
    return LIBAV_CONFIGURATION;
98
}
99

    
100
const char *postproc_license(void)
101
{
102
#define LICENSE_PREFIX "libpostproc license: "
103
    return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
104
}
105

    
106
#if HAVE_ALTIVEC_H
107
#include <altivec.h>
108
#endif
109

    
110
#define GET_MODE_BUFFER_SIZE 500
111
#define OPTIONS_ARRAY_SIZE 10
112
#define BLOCK_SIZE 8
113
#define TEMP_STRIDE 8
114
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
115

    
116
#if ARCH_X86
117
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
118
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
119
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
120
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
121
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
122
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
123
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
124
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
125
#endif
126

    
127
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
128

    
129

    
130
static struct PPFilter filters[]=
131
{
132
    {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
133
    {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
134
/*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
135
    {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
136
    {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
137
    {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
138
    {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
139
    {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
140
    {"dr", "dering",                1, 5, 6, DERING},
141
    {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
142
    {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
143
    {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
144
    {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
145
    {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
146
    {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
147
    {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
148
    {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
149
    {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
150
    {NULL, NULL,0,0,0,0} //End Marker
151
};
152

    
153
static const char *replaceTable[]=
154
{
155
    "default",      "hb:a,vb:a,dr:a",
156
    "de",           "hb:a,vb:a,dr:a",
157
    "fast",         "h1:a,v1:a,dr:a",
158
    "fa",           "h1:a,v1:a,dr:a",
159
    "ac",           "ha:a:128:7,va:a,dr:a",
160
    NULL //End Marker
161
};
162

    
163

    
164
#if ARCH_X86
165
static inline void prefetchnta(void *p)
166
{
167
    __asm__ volatile(   "prefetchnta (%0)\n\t"
168
        : : "r" (p)
169
    );
170
}
171

    
172
static inline void prefetcht0(void *p)
173
{
174
    __asm__ volatile(   "prefetcht0 (%0)\n\t"
175
        : : "r" (p)
176
    );
177
}
178

    
179
static inline void prefetcht1(void *p)
180
{
181
    __asm__ volatile(   "prefetcht1 (%0)\n\t"
182
        : : "r" (p)
183
    );
184
}
185

    
186
static inline void prefetcht2(void *p)
187
{
188
    __asm__ volatile(   "prefetcht2 (%0)\n\t"
189
        : : "r" (p)
190
    );
191
}
192
#endif
193

    
194
/* The horizontal functions exist only in C because the MMX
195
 * code is faster with vertical filters and transposing. */
196

    
197
/**
198
 * Check if the given 8x8 Block is mostly "flat"
199
 */
200
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
201
{
202
    int numEq= 0;
203
    int y;
204
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
205
    const int dcThreshold= dcOffset*2 + 1;
206

    
207
    for(y=0; y<BLOCK_SIZE; y++){
208
        if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
209
        if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
210
        if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
211
        if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
212
        if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
213
        if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
214
        if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
215
        src+= stride;
216
    }
217
    return numEq > c->ppMode.flatnessThreshold;
218
}
219

    
220
/**
221
 * Check if the middle 8x8 Block in the given 8x16 block is flat
222
 */
223
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
224
{
225
    int numEq= 0;
226
    int y;
227
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
228
    const int dcThreshold= dcOffset*2 + 1;
229

    
230
    src+= stride*4; // src points to begin of the 8x8 Block
231
    for(y=0; y<BLOCK_SIZE-1; y++){
232
        if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
233
        if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
234
        if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
235
        if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
236
        if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
237
        if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
238
        if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
239
        if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
240
        src+= stride;
241
    }
242
    return numEq > c->ppMode.flatnessThreshold;
243
}
244

    
245
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
246
{
247
    int i;
248
#if 1
249
    for(i=0; i<2; i++){
250
        if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
251
        src += stride;
252
        if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
253
        src += stride;
254
        if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
255
        src += stride;
256
        if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
257
        src += stride;
258
    }
259
#else
260
    for(i=0; i<8; i++){
261
        if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
262
        src += stride;
263
    }
264
#endif
265
    return 1;
266
}
267

    
268
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
269
{
270
#if 1
271
#if 1
272
    int x;
273
    src+= stride*4;
274
    for(x=0; x<BLOCK_SIZE; x+=4){
275
        if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
276
        if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
277
        if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
278
        if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
279
    }
280
#else
281
    int x;
282
    src+= stride*3;
283
    for(x=0; x<BLOCK_SIZE; x++){
284
        if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
285
    }
286
#endif
287
    return 1;
288
#else
289
    int x;
290
    src+= stride*4;
291
    for(x=0; x<BLOCK_SIZE; x++){
292
        int min=255;
293
        int max=0;
294
        int y;
295
        for(y=0; y<8; y++){
296
            int v= src[x + y*stride];
297
            if(v>max) max=v;
298
            if(v<min) min=v;
299
        }
300
        if(max-min > 2*QP) return 0;
301
    }
302
    return 1;
303
#endif
304
}
305

    
306
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
307
{
308
    if( isHorizDC_C(src, stride, c) ){
309
        if( isHorizMinMaxOk_C(src, stride, c->QP) )
310
            return 1;
311
        else
312
            return 0;
313
    }else{
314
        return 2;
315
    }
316
}
317

    
318
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
319
{
320
    if( isVertDC_C(src, stride, c) ){
321
        if( isVertMinMaxOk_C(src, stride, c->QP) )
322
            return 1;
323
        else
324
            return 0;
325
    }else{
326
        return 2;
327
    }
328
}
329

    
330
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
331
{
332
    int y;
333
    for(y=0; y<BLOCK_SIZE; y++){
334
        const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
335

    
336
        if(FFABS(middleEnergy) < 8*c->QP){
337
            const int q=(dst[3] - dst[4])/2;
338
            const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
339
            const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
340

    
341
            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
342
            d= FFMAX(d, 0);
343

    
344
            d= (5*d + 32) >> 6;
345
            d*= FFSIGN(-middleEnergy);
346

    
347
            if(q>0)
348
            {
349
                d= d<0 ? 0 : d;
350
                d= d>q ? q : d;
351
            }
352
            else
353
            {
354
                d= d>0 ? 0 : d;
355
                d= d<q ? q : d;
356
            }
357

    
358
            dst[3]-= d;
359
            dst[4]+= d;
360
        }
361
        dst+= stride;
362
    }
363
}
364

    
365
/**
366
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
367
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
368
 */
369
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
370
{
371
    int y;
372
    for(y=0; y<BLOCK_SIZE; y++){
373
        const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
374
        const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
375

    
376
        int sums[10];
377
        sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
378
        sums[1] = sums[0] - first  + dst[3];
379
        sums[2] = sums[1] - first  + dst[4];
380
        sums[3] = sums[2] - first  + dst[5];
381
        sums[4] = sums[3] - first  + dst[6];
382
        sums[5] = sums[4] - dst[0] + dst[7];
383
        sums[6] = sums[5] - dst[1] + last;
384
        sums[7] = sums[6] - dst[2] + last;
385
        sums[8] = sums[7] - dst[3] + last;
386
        sums[9] = sums[8] - dst[4] + last;
387

    
388
        dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
389
        dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
390
        dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
391
        dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
392
        dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
393
        dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
394
        dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
395
        dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
396

    
397
        dst+= stride;
398
    }
399
}
400

    
401
/**
402
 * Experimental Filter 1 (Horizontal)
403
 * will not damage linear gradients
404
 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
405
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
406
 * MMX2 version does correct clipping C version does not
407
 * not identical with the vertical one
408
 */
409
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
410
{
411
    int y;
412
    static uint64_t *lut= NULL;
413
    if(lut==NULL)
414
    {
415
        int i;
416
        lut = av_malloc(256*8);
417
        for(i=0; i<256; i++)
418
        {
419
            int v= i < 128 ? 2*i : 2*(i-256);
420
/*
421
//Simulate 112242211 9-Tap filter
422
            uint64_t a= (v/16)  & 0xFF;
423
            uint64_t b= (v/8)   & 0xFF;
424
            uint64_t c= (v/4)   & 0xFF;
425
            uint64_t d= (3*v/8) & 0xFF;
426
*/
427
//Simulate piecewise linear interpolation
428
            uint64_t a= (v/16)   & 0xFF;
429
            uint64_t b= (v*3/16) & 0xFF;
430
            uint64_t c= (v*5/16) & 0xFF;
431
            uint64_t d= (7*v/16) & 0xFF;
432
            uint64_t A= (0x100 - a)&0xFF;
433
            uint64_t B= (0x100 - b)&0xFF;
434
            uint64_t C= (0x100 - c)&0xFF;
435
            uint64_t D= (0x100 - c)&0xFF;
436

    
437
            lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
438
                       (D<<24) | (C<<16) | (B<<8)  | (A);
439
            //lut[i] = (v<<32) | (v<<24);
440
        }
441
    }
442

    
443
    for(y=0; y<BLOCK_SIZE; y++){
444
        int a= src[1] - src[2];
445
        int b= src[3] - src[4];
446
        int c= src[5] - src[6];
447

    
448
        int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
449

    
450
        if(d < QP){
451
            int v = d * FFSIGN(-b);
452

    
453
            src[1] +=v/8;
454
            src[2] +=v/4;
455
            src[3] +=3*v/8;
456
            src[4] -=3*v/8;
457
            src[5] -=v/4;
458
            src[6] -=v/8;
459
        }
460
        src+=stride;
461
    }
462
}
463

    
464
/**
465
 * accurate deblock filter
466
 */
467
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
468
    int y;
469
    const int QP= c->QP;
470
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
471
    const int dcThreshold= dcOffset*2 + 1;
472
//START_TIMER
473
    src+= step*4; // src points to begin of the 8x8 Block
474
    for(y=0; y<8; y++){
475
        int numEq= 0;
476

    
477
        if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
478
        if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
479
        if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
480
        if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
481
        if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
482
        if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
483
        if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
484
        if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
485
        if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
486
        if(numEq > c->ppMode.flatnessThreshold){
487
            int min, max, x;
488

    
489
            if(src[0] > src[step]){
490
                max= src[0];
491
                min= src[step];
492
            }else{
493
                max= src[step];
494
                min= src[0];
495
            }
496
            for(x=2; x<8; x+=2){
497
                if(src[x*step] > src[(x+1)*step]){
498
                        if(src[x    *step] > max) max= src[ x   *step];
499
                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
500
                }else{
501
                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
502
                        if(src[ x   *step] < min) min= src[ x   *step];
503
                }
504
            }
505
            if(max-min < 2*QP){
506
                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
507
                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
508

    
509
                int sums[10];
510
                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
511
                sums[1] = sums[0] - first       + src[3*step];
512
                sums[2] = sums[1] - first       + src[4*step];
513
                sums[3] = sums[2] - first       + src[5*step];
514
                sums[4] = sums[3] - first       + src[6*step];
515
                sums[5] = sums[4] - src[0*step] + src[7*step];
516
                sums[6] = sums[5] - src[1*step] + last;
517
                sums[7] = sums[6] - src[2*step] + last;
518
                sums[8] = sums[7] - src[3*step] + last;
519
                sums[9] = sums[8] - src[4*step] + last;
520

    
521
                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
522
                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
523
                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
524
                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
525
                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
526
                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
527
                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
528
                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
529
            }
530
        }else{
531
            const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
532

    
533
            if(FFABS(middleEnergy) < 8*QP){
534
                const int q=(src[3*step] - src[4*step])/2;
535
                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
536
                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
537

    
538
                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
539
                d= FFMAX(d, 0);
540

    
541
                d= (5*d + 32) >> 6;
542
                d*= FFSIGN(-middleEnergy);
543

    
544
                if(q>0){
545
                    d= d<0 ? 0 : d;
546
                    d= d>q ? q : d;
547
                }else{
548
                    d= d>0 ? 0 : d;
549
                    d= d<q ? q : d;
550
                }
551

    
552
                src[3*step]-= d;
553
                src[4*step]+= d;
554
            }
555
        }
556

    
557
        src += stride;
558
    }
559
/*if(step==16){
560
    STOP_TIMER("step16")
561
}else{
562
    STOP_TIMER("stepX")
563
}*/
564
}
565

    
566
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
567
//Plain C versions
568
#if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
569
#define COMPILE_C
570
#endif
571

    
572
#if HAVE_ALTIVEC
573
#define COMPILE_ALTIVEC
574
#endif //HAVE_ALTIVEC
575

    
576
#if ARCH_X86
577

    
578
#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
579
#define COMPILE_MMX
580
#endif
581

    
582
#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
583
#define COMPILE_MMX2
584
#endif
585

    
586
#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
587
#define COMPILE_3DNOW
588
#endif
589
#endif /* ARCH_X86 */
590

    
591
#undef HAVE_MMX
592
#define HAVE_MMX 0
593
#undef HAVE_MMX2
594
#define HAVE_MMX2 0
595
#undef HAVE_AMD3DNOW
596
#define HAVE_AMD3DNOW 0
597
#undef HAVE_ALTIVEC
598
#define HAVE_ALTIVEC 0
599

    
600
#ifdef COMPILE_C
601
#define RENAME(a) a ## _C
602
#include "postprocess_template.c"
603
#endif
604

    
605
#ifdef COMPILE_ALTIVEC
606
#undef RENAME
607
#undef HAVE_ALTIVEC
608
#define HAVE_ALTIVEC 1
609
#define RENAME(a) a ## _altivec
610
#include "postprocess_altivec_template.c"
611
#include "postprocess_template.c"
612
#endif
613

    
614
//MMX versions
615
#ifdef COMPILE_MMX
616
#undef RENAME
617
#undef HAVE_MMX
618
#define HAVE_MMX 1
619
#define RENAME(a) a ## _MMX
620
#include "postprocess_template.c"
621
#endif
622

    
623
//MMX2 versions
624
#ifdef COMPILE_MMX2
625
#undef RENAME
626
#undef HAVE_MMX
627
#undef HAVE_MMX2
628
#define HAVE_MMX 1
629
#define HAVE_MMX2 1
630
#define RENAME(a) a ## _MMX2
631
#include "postprocess_template.c"
632
#endif
633

    
634
//3DNOW versions
635
#ifdef COMPILE_3DNOW
636
#undef RENAME
637
#undef HAVE_MMX
638
#undef HAVE_MMX2
639
#undef HAVE_AMD3DNOW
640
#define HAVE_MMX 1
641
#define HAVE_MMX2 0
642
#define HAVE_AMD3DNOW 1
643
#define RENAME(a) a ## _3DNow
644
#include "postprocess_template.c"
645
#endif
646

    
647
// minor note: the HAVE_xyz is messed up after that line so do not use it.
648

    
649
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
650
        const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
651
{
652
    PPContext *c= (PPContext *)vc;
653
    PPMode *ppMode= (PPMode *)vm;
654
    c->ppMode= *ppMode; //FIXME
655

    
656
    // Using ifs here as they are faster than function pointers although the
657
    // difference would not be measurable here but it is much better because
658
    // someone might exchange the CPU whithout restarting MPlayer ;)
659
#if CONFIG_RUNTIME_CPUDETECT
660
#if ARCH_X86
661
    // ordered per speed fastest first
662
    if(c->cpuCaps & PP_CPU_CAPS_MMX2)
663
        postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
664
    else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
665
        postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
666
    else if(c->cpuCaps & PP_CPU_CAPS_MMX)
667
        postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
668
    else
669
        postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670
#else
671
#if HAVE_ALTIVEC
672
    if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
673
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
674
    else
675
#endif
676
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
677
#endif
678
#else //CONFIG_RUNTIME_CPUDETECT
679
#if   HAVE_MMX2
680
            postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
681
#elif HAVE_AMD3DNOW
682
            postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
683
#elif HAVE_MMX
684
            postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685
#elif HAVE_ALTIVEC
686
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687
#else
688
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689
#endif
690
#endif //!CONFIG_RUNTIME_CPUDETECT
691
}
692

    
693
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
694
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
695

    
696
/* -pp Command line Help
697
*/
698
const char pp_help[] =
699
"Available postprocessing filters:\n"
700
"Filters                        Options\n"
701
"short  long name       short   long option     Description\n"
702
"*      *               a       autoq           CPU power dependent enabler\n"
703
"                       c       chrom           chrominance filtering enabled\n"
704
"                       y       nochrom         chrominance filtering disabled\n"
705
"                       n       noluma          luma filtering disabled\n"
706
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
707
"       1. difference factor: default=32, higher -> more deblocking\n"
708
"       2. flatness threshold: default=39, lower -> more deblocking\n"
709
"                       the h & v deblocking filters share these\n"
710
"                       so you can't set different thresholds for h / v\n"
711
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
712
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
713
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
714
"h1     x1hdeblock                              experimental h deblock filter 1\n"
715
"v1     x1vdeblock                              experimental v deblock filter 1\n"
716
"dr     dering                                  deringing filter\n"
717
"al     autolevels                              automatic brightness / contrast\n"
718
"                       f        fullyrange     stretch luminance to (0..255)\n"
719
"lb     linblenddeint                           linear blend deinterlacer\n"
720
"li     linipoldeint                            linear interpolating deinterlace\n"
721
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
722
"md     mediandeint                             median deinterlacer\n"
723
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
724
"l5     lowpass5                                FIR lowpass deinterlacer\n"
725
"de     default                                 hb:a,vb:a,dr:a\n"
726
"fa     fast                                    h1:a,v1:a,dr:a\n"
727
"ac                                             ha:a:128:7,va:a,dr:a\n"
728
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
729
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
730
"fq     forceQuant      <quantizer>             force quantizer\n"
731
"Usage:\n"
732
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
733
"long form example:\n"
734
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
735
"short form example:\n"
736
"vb:a/hb:a/lb                                   de,-vb\n"
737
"more examples:\n"
738
"tn:64:128:256\n"
739
"\n"
740
;
741

    
742
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
743
{
744
    char temp[GET_MODE_BUFFER_SIZE];
745
    char *p= temp;
746
    static const char filterDelimiters[] = ",/";
747
    static const char optionDelimiters[] = ":";
748
    struct PPMode *ppMode;
749
    char *filterToken;
750

    
751
    ppMode= av_malloc(sizeof(PPMode));
752

    
753
    ppMode->lumMode= 0;
754
    ppMode->chromMode= 0;
755
    ppMode->maxTmpNoise[0]= 700;
756
    ppMode->maxTmpNoise[1]= 1500;
757
    ppMode->maxTmpNoise[2]= 3000;
758
    ppMode->maxAllowedY= 234;
759
    ppMode->minAllowedY= 16;
760
    ppMode->baseDcDiff= 256/8;
761
    ppMode->flatnessThreshold= 56-16-1;
762
    ppMode->maxClippedThreshold= 0.01;
763
    ppMode->error=0;
764

    
765
    strncpy(temp, name, GET_MODE_BUFFER_SIZE);
766

    
767
    av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
768

    
769
    for(;;){
770
        char *filterName;
771
        int q= 1000000; //PP_QUALITY_MAX;
772
        int chrom=-1;
773
        int luma=-1;
774
        char *option;
775
        char *options[OPTIONS_ARRAY_SIZE];
776
        int i;
777
        int filterNameOk=0;
778
        int numOfUnknownOptions=0;
779
        int enable=1; //does the user want us to enabled or disabled the filter
780

    
781
        filterToken= strtok(p, filterDelimiters);
782
        if(filterToken == NULL) break;
783
        p+= strlen(filterToken) + 1; // p points to next filterToken
784
        filterName= strtok(filterToken, optionDelimiters);
785
        av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
786

    
787
        if(*filterName == '-'){
788
            enable=0;
789
            filterName++;
790
        }
791

    
792
        for(;;){ //for all options
793
            option= strtok(NULL, optionDelimiters);
794
            if(option == NULL) break;
795

    
796
            av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
797
            if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
798
            else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
799
            else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
800
            else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
801
            else{
802
                options[numOfUnknownOptions] = option;
803
                numOfUnknownOptions++;
804
            }
805
            if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
806
        }
807
        options[numOfUnknownOptions] = NULL;
808

    
809
        /* replace stuff from the replace Table */
810
        for(i=0; replaceTable[2*i]!=NULL; i++){
811
            if(!strcmp(replaceTable[2*i], filterName)){
812
                int newlen= strlen(replaceTable[2*i + 1]);
813
                int plen;
814
                int spaceLeft;
815

    
816
                if(p==NULL) p= temp, *p=0;      //last filter
817
                else p--, *p=',';               //not last filter
818

    
819
                plen= strlen(p);
820
                spaceLeft= p - temp + plen;
821
                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
822
                    ppMode->error++;
823
                    break;
824
                }
825
                memmove(p + newlen, p, plen+1);
826
                memcpy(p, replaceTable[2*i + 1], newlen);
827
                filterNameOk=1;
828
            }
829
        }
830

    
831
        for(i=0; filters[i].shortName!=NULL; i++){
832
            if(   !strcmp(filters[i].longName, filterName)
833
               || !strcmp(filters[i].shortName, filterName)){
834
                ppMode->lumMode &= ~filters[i].mask;
835
                ppMode->chromMode &= ~filters[i].mask;
836

    
837
                filterNameOk=1;
838
                if(!enable) break; // user wants to disable it
839

    
840
                if(q >= filters[i].minLumQuality && luma)
841
                    ppMode->lumMode|= filters[i].mask;
842
                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
843
                    if(q >= filters[i].minChromQuality)
844
                            ppMode->chromMode|= filters[i].mask;
845

    
846
                if(filters[i].mask == LEVEL_FIX){
847
                    int o;
848
                    ppMode->minAllowedY= 16;
849
                    ppMode->maxAllowedY= 234;
850
                    for(o=0; options[o]!=NULL; o++){
851
                        if(  !strcmp(options[o],"fullyrange")
852
                           ||!strcmp(options[o],"f")){
853
                            ppMode->minAllowedY= 0;
854
                            ppMode->maxAllowedY= 255;
855
                            numOfUnknownOptions--;
856
                        }
857
                    }
858
                }
859
                else if(filters[i].mask == TEMP_NOISE_FILTER)
860
                {
861
                    int o;
862
                    int numOfNoises=0;
863

    
864
                    for(o=0; options[o]!=NULL; o++){
865
                        char *tail;
866
                        ppMode->maxTmpNoise[numOfNoises]=
867
                            strtol(options[o], &tail, 0);
868
                        if(tail!=options[o]){
869
                            numOfNoises++;
870
                            numOfUnknownOptions--;
871
                            if(numOfNoises >= 3) break;
872
                        }
873
                    }
874
                }
875
                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
876
                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
877
                    int o;
878

    
879
                    for(o=0; options[o]!=NULL && o<2; o++){
880
                        char *tail;
881
                        int val= strtol(options[o], &tail, 0);
882
                        if(tail==options[o]) break;
883

    
884
                        numOfUnknownOptions--;
885
                        if(o==0) ppMode->baseDcDiff= val;
886
                        else ppMode->flatnessThreshold= val;
887
                    }
888
                }
889
                else if(filters[i].mask == FORCE_QUANT){
890
                    int o;
891
                    ppMode->forcedQuant= 15;
892

    
893
                    for(o=0; options[o]!=NULL && o<1; o++){
894
                        char *tail;
895
                        int val= strtol(options[o], &tail, 0);
896
                        if(tail==options[o]) break;
897

    
898
                        numOfUnknownOptions--;
899
                        ppMode->forcedQuant= val;
900
                    }
901
                }
902
            }
903
        }
904
        if(!filterNameOk) ppMode->error++;
905
        ppMode->error += numOfUnknownOptions;
906
    }
907

    
908
    av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
909
    if(ppMode->error){
910
        av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
911
        av_free(ppMode);
912
        return NULL;
913
    }
914
    return ppMode;
915
}
916

    
917
void pp_free_mode(pp_mode *mode){
918
    av_free(mode);
919
}
920

    
921
static void reallocAlign(void **p, int alignment, int size){
922
    av_free(*p);
923
    *p= av_mallocz(size);
924
}
925

    
926
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
927
    int mbWidth = (width+15)>>4;
928
    int mbHeight= (height+15)>>4;
929
    int i;
930

    
931
    c->stride= stride;
932
    c->qpStride= qpStride;
933

    
934
    reallocAlign((void **)&c->tempDst, 8, stride*24);
935
    reallocAlign((void **)&c->tempSrc, 8, stride*24);
936
    reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
937
    reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
938
    for(i=0; i<256; i++)
939
            c->yHistogram[i]= width*height/64*15/256;
940

    
941
    for(i=0; i<3; i++){
942
        //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
943
        reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
944
        reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
945
    }
946

    
947
    reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
948
    reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
949
    reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
950
    reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
951
}
952

    
953
static const char * context_to_name(void * ptr) {
954
    return "postproc";
955
}
956

    
957
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
958

    
959
pp_context *pp_get_context(int width, int height, int cpuCaps){
960
    PPContext *c= av_malloc(sizeof(PPContext));
961
    int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
962
    int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
963

    
964
    memset(c, 0, sizeof(PPContext));
965
    c->av_class = &av_codec_context_class;
966
    c->cpuCaps= cpuCaps;
967
    if(cpuCaps&PP_FORMAT){
968
        c->hChromaSubSample= cpuCaps&0x3;
969
        c->vChromaSubSample= (cpuCaps>>4)&0x3;
970
    }else{
971
        c->hChromaSubSample= 1;
972
        c->vChromaSubSample= 1;
973
    }
974

    
975
    reallocBuffers(c, width, height, stride, qpStride);
976

    
977
    c->frameNum=-1;
978

    
979
    return c;
980
}
981

    
982
void pp_free_context(void *vc){
983
    PPContext *c = (PPContext*)vc;
984
    int i;
985

    
986
    for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
987
    for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
988

    
989
    av_free(c->tempBlocks);
990
    av_free(c->yHistogram);
991
    av_free(c->tempDst);
992
    av_free(c->tempSrc);
993
    av_free(c->deintTemp);
994
    av_free(c->stdQPTable);
995
    av_free(c->nonBQPTable);
996
    av_free(c->forcedQPTable);
997

    
998
    memset(c, 0, sizeof(PPContext));
999

    
1000
    av_free(c);
1001
}
1002

    
1003
void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
1004
                     uint8_t * dst[3], const int dstStride[3],
1005
                     int width, int height,
1006
                     const QP_STORE_T *QP_store,  int QPStride,
1007
                     pp_mode *vm,  void *vc, int pict_type)
1008
{
1009
    int mbWidth = (width+15)>>4;
1010
    int mbHeight= (height+15)>>4;
1011
    PPMode *mode = (PPMode*)vm;
1012
    PPContext *c = (PPContext*)vc;
1013
    int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1014
    int absQPStride = FFABS(QPStride);
1015

    
1016
    // c->stride and c->QPStride are always positive
1017
    if(c->stride < minStride || c->qpStride < absQPStride)
1018
        reallocBuffers(c, width, height,
1019
                       FFMAX(minStride, c->stride),
1020
                       FFMAX(c->qpStride, absQPStride));
1021

    
1022
    if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1023
        int i;
1024
        QP_store= c->forcedQPTable;
1025
        absQPStride = QPStride = 0;
1026
        if(mode->lumMode & FORCE_QUANT)
1027
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1028
        else
1029
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1030
    }
1031

    
1032
    if(pict_type & PP_PICT_TYPE_QP2){
1033
        int i;
1034
        const int count= mbHeight * absQPStride;
1035
        for(i=0; i<(count>>2); i++){
1036
            ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1037
        }
1038
        for(i<<=2; i<count; i++){
1039
            c->stdQPTable[i] = QP_store[i]>>1;
1040
        }
1041
        QP_store= c->stdQPTable;
1042
        QPStride= absQPStride;
1043
    }
1044

    
1045
    if(0){
1046
        int x,y;
1047
        for(y=0; y<mbHeight; y++){
1048
            for(x=0; x<mbWidth; x++){
1049
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1050
            }
1051
            av_log(c, AV_LOG_INFO, "\n");
1052
        }
1053
        av_log(c, AV_LOG_INFO, "\n");
1054
    }
1055

    
1056
    if((pict_type&7)!=3){
1057
        if (QPStride >= 0){
1058
            int i;
1059
            const int count= mbHeight * QPStride;
1060
            for(i=0; i<(count>>2); i++){
1061
                ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1062
            }
1063
            for(i<<=2; i<count; i++){
1064
                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1065
            }
1066
        } else {
1067
            int i,j;
1068
            for(i=0; i<mbHeight; i++) {
1069
                for(j=0; j<absQPStride; j++) {
1070
                    c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1071
                }
1072
            }
1073
        }
1074
    }
1075

    
1076
    av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1077
           mode->lumMode, mode->chromMode);
1078

    
1079
    postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1080
                width, height, QP_store, QPStride, 0, mode, c);
1081

    
1082
    width  = (width )>>c->hChromaSubSample;
1083
    height = (height)>>c->vChromaSubSample;
1084

    
1085
    if(mode->chromMode){
1086
        postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1087
                    width, height, QP_store, QPStride, 1, mode, c);
1088
        postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1089
                    width, height, QP_store, QPStride, 2, mode, c);
1090
    }
1091
    else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1092
        linecpy(dst[1], src[1], height, srcStride[1]);
1093
        linecpy(dst[2], src[2], height, srcStride[2]);
1094
    }else{
1095
        int y;
1096
        for(y=0; y<height; y++){
1097
            memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1098
            memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1099
        }
1100
    }
1101
}
1102