Statistics
| Branch: | Revision:

ffmpeg / libpostproc / postprocess.c @ faa6f1c3

History | View | Annotate | Download (37.5 KB)

1
/*
2
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23
/**
24
 * @file
25
 * postprocessing.
26
 */
27

    
28
/*
29
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49

50
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
53
e = almost exact implementation (slightly different rounding,...)
54
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57
*/
58

    
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
        (the if/else stuff per block is slowing things down)
67
compare the quality & speed of all filters
68
split this huge file
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72
*/
73

    
74
//Changelog: use git log
75

    
76
#include "config.h"
77
#include "libavutil/avutil.h"
78
#include <inttypes.h>
79
#include <stdio.h>
80
#include <stdlib.h>
81
#include <string.h>
82
//#undef HAVE_MMX2
83
//#define HAVE_AMD3DNOW
84
//#undef HAVE_MMX
85
//#undef ARCH_X86
86
//#define DEBUG_BRIGHTNESS
87
#include "postprocess.h"
88
#include "postprocess_internal.h"
89

    
90
unsigned postproc_version(void)
91
{
92
    return LIBPOSTPROC_VERSION_INT;
93
}
94

    
95
const char *postproc_configuration(void)
96
{
97
    return LIBAV_CONFIGURATION;
98
}
99

    
100
const char *postproc_license(void)
101
{
102
#define LICENSE_PREFIX "libpostproc license: "
103
    return LICENSE_PREFIX LIBAV_LICENSE + sizeof(LICENSE_PREFIX) - 1;
104
}
105

    
106
#if HAVE_ALTIVEC_H
107
#include <altivec.h>
108
#endif
109

    
110
#define GET_MODE_BUFFER_SIZE 500
111
#define OPTIONS_ARRAY_SIZE 10
112
#define BLOCK_SIZE 8
113
#define TEMP_STRIDE 8
114
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
115

    
116
#if ARCH_X86
117
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
118
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
119
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
120
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
121
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
122
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
123
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
124
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
125
#endif
126

    
127
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
128

    
129

    
130
static struct PPFilter filters[]=
131
{
132
    {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
133
    {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
134
/*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
135
    {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
136
    {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
137
    {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
138
    {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
139
    {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
140
    {"dr", "dering",                1, 5, 6, DERING},
141
    {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
142
    {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
143
    {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
144
    {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
145
    {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
146
    {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
147
    {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
148
    {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
149
    {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
150
    {NULL, NULL,0,0,0,0} //End Marker
151
};
152

    
153
static const char *replaceTable[]=
154
{
155
    "default",      "hb:a,vb:a,dr:a",
156
    "de",           "hb:a,vb:a,dr:a",
157
    "fast",         "h1:a,v1:a,dr:a",
158
    "fa",           "h1:a,v1:a,dr:a",
159
    "ac",           "ha:a:128:7,va:a,dr:a",
160
    NULL //End Marker
161
};
162

    
163

    
164
#if ARCH_X86
165
static inline void prefetchnta(void *p)
166
{
167
    __asm__ volatile(   "prefetchnta (%0)\n\t"
168
        : : "r" (p)
169
    );
170
}
171

    
172
static inline void prefetcht0(void *p)
173
{
174
    __asm__ volatile(   "prefetcht0 (%0)\n\t"
175
        : : "r" (p)
176
    );
177
}
178

    
179
static inline void prefetcht1(void *p)
180
{
181
    __asm__ volatile(   "prefetcht1 (%0)\n\t"
182
        : : "r" (p)
183
    );
184
}
185

    
186
static inline void prefetcht2(void *p)
187
{
188
    __asm__ volatile(   "prefetcht2 (%0)\n\t"
189
        : : "r" (p)
190
    );
191
}
192
#endif
193

    
194
/* The horizontal functions exist only in C because the MMX
195
 * code is faster with vertical filters and transposing. */
196

    
197
/**
198
 * Check if the given 8x8 Block is mostly "flat"
199
 */
200
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
201
{
202
    int numEq= 0;
203
    int y;
204
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
205
    const int dcThreshold= dcOffset*2 + 1;
206

    
207
    for(y=0; y<BLOCK_SIZE; y++){
208
        if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
209
        if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
210
        if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
211
        if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
212
        if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
213
        if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
214
        if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
215
        src+= stride;
216
    }
217
    return numEq > c->ppMode.flatnessThreshold;
218
}
219

    
220
/**
221
 * Check if the middle 8x8 Block in the given 8x16 block is flat
222
 */
223
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
224
{
225
    int numEq= 0;
226
    int y;
227
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
228
    const int dcThreshold= dcOffset*2 + 1;
229

    
230
    src+= stride*4; // src points to begin of the 8x8 Block
231
    for(y=0; y<BLOCK_SIZE-1; y++){
232
        if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
233
        if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
234
        if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
235
        if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
236
        if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
237
        if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
238
        if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
239
        if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
240
        src+= stride;
241
    }
242
    return numEq > c->ppMode.flatnessThreshold;
243
}
244

    
245
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
246
{
247
    int i;
248
#if 1
249
    for(i=0; i<2; i++){
250
        if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
251
        src += stride;
252
        if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
253
        src += stride;
254
        if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
255
        src += stride;
256
        if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
257
        src += stride;
258
    }
259
#else
260
    for(i=0; i<8; i++){
261
        if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
262
        src += stride;
263
    }
264
#endif
265
    return 1;
266
}
267

    
268
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
269
{
270
#if 1
271
#if 1
272
    int x;
273
    src+= stride*4;
274
    for(x=0; x<BLOCK_SIZE; x+=4){
275
        if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
276
        if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
277
        if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
278
        if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
279
    }
280
#else
281
    int x;
282
    src+= stride*3;
283
    for(x=0; x<BLOCK_SIZE; x++){
284
        if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
285
    }
286
#endif
287
    return 1;
288
#else
289
    int x;
290
    src+= stride*4;
291
    for(x=0; x<BLOCK_SIZE; x++){
292
        int min=255;
293
        int max=0;
294
        int y;
295
        for(y=0; y<8; y++){
296
            int v= src[x + y*stride];
297
            if(v>max) max=v;
298
            if(v<min) min=v;
299
        }
300
        if(max-min > 2*QP) return 0;
301
    }
302
    return 1;
303
#endif
304
}
305

    
306
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
307
{
308
    if( isHorizDC_C(src, stride, c) ){
309
        if( isHorizMinMaxOk_C(src, stride, c->QP) )
310
            return 1;
311
        else
312
            return 0;
313
    }else{
314
        return 2;
315
    }
316
}
317

    
318
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
319
{
320
    if( isVertDC_C(src, stride, c) ){
321
        if( isVertMinMaxOk_C(src, stride, c->QP) )
322
            return 1;
323
        else
324
            return 0;
325
    }else{
326
        return 2;
327
    }
328
}
329

    
330
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
331
{
332
    int y;
333
    for(y=0; y<BLOCK_SIZE; y++){
334
        const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
335

    
336
        if(FFABS(middleEnergy) < 8*c->QP){
337
            const int q=(dst[3] - dst[4])/2;
338
            const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
339
            const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
340

    
341
            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
342
            d= FFMAX(d, 0);
343

    
344
            d= (5*d + 32) >> 6;
345
            d*= FFSIGN(-middleEnergy);
346

    
347
            if(q>0)
348
            {
349
                d= d<0 ? 0 : d;
350
                d= d>q ? q : d;
351
            }
352
            else
353
            {
354
                d= d>0 ? 0 : d;
355
                d= d<q ? q : d;
356
            }
357

    
358
            dst[3]-= d;
359
            dst[4]+= d;
360
        }
361
        dst+= stride;
362
    }
363
}
364

    
365
/**
366
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
367
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
368
 */
369
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
370
{
371
    int y;
372
    for(y=0; y<BLOCK_SIZE; y++){
373
        const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
374
        const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
375

    
376
        int sums[10];
377
        sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
378
        sums[1] = sums[0] - first  + dst[3];
379
        sums[2] = sums[1] - first  + dst[4];
380
        sums[3] = sums[2] - first  + dst[5];
381
        sums[4] = sums[3] - first  + dst[6];
382
        sums[5] = sums[4] - dst[0] + dst[7];
383
        sums[6] = sums[5] - dst[1] + last;
384
        sums[7] = sums[6] - dst[2] + last;
385
        sums[8] = sums[7] - dst[3] + last;
386
        sums[9] = sums[8] - dst[4] + last;
387

    
388
        dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
389
        dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
390
        dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
391
        dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
392
        dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
393
        dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
394
        dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
395
        dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
396

    
397
        dst+= stride;
398
    }
399
}
400

    
401
/**
402
 * Experimental Filter 1 (Horizontal)
403
 * will not damage linear gradients
404
 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
405
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
406
 * MMX2 version does correct clipping C version does not
407
 * not identical with the vertical one
408
 */
409
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
410
{
411
    int y;
412
    static uint64_t *lut= NULL;
413
    if(lut==NULL)
414
    {
415
        int i;
416
        lut = av_malloc(256*8);
417
        for(i=0; i<256; i++)
418
        {
419
            int v= i < 128 ? 2*i : 2*(i-256);
420
/*
421
//Simulate 112242211 9-Tap filter
422
            uint64_t a= (v/16)  & 0xFF;
423
            uint64_t b= (v/8)   & 0xFF;
424
            uint64_t c= (v/4)   & 0xFF;
425
            uint64_t d= (3*v/8) & 0xFF;
426
*/
427
//Simulate piecewise linear interpolation
428
            uint64_t a= (v/16)   & 0xFF;
429
            uint64_t b= (v*3/16) & 0xFF;
430
            uint64_t c= (v*5/16) & 0xFF;
431
            uint64_t d= (7*v/16) & 0xFF;
432
            uint64_t A= (0x100 - a)&0xFF;
433
            uint64_t B= (0x100 - b)&0xFF;
434
            uint64_t C= (0x100 - c)&0xFF;
435
            uint64_t D= (0x100 - c)&0xFF;
436

    
437
            lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
438
                       (D<<24) | (C<<16) | (B<<8)  | (A);
439
            //lut[i] = (v<<32) | (v<<24);
440
        }
441
    }
442

    
443
    for(y=0; y<BLOCK_SIZE; y++){
444
        int a= src[1] - src[2];
445
        int b= src[3] - src[4];
446
        int c= src[5] - src[6];
447

    
448
        int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
449

    
450
        if(d < QP){
451
            int v = d * FFSIGN(-b);
452

    
453
            src[1] +=v/8;
454
            src[2] +=v/4;
455
            src[3] +=3*v/8;
456
            src[4] -=3*v/8;
457
            src[5] -=v/4;
458
            src[6] -=v/8;
459
        }
460
        src+=stride;
461
    }
462
}
463

    
464
/**
465
 * accurate deblock filter
466
 */
467
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
468
    int y;
469
    const int QP= c->QP;
470
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
471
    const int dcThreshold= dcOffset*2 + 1;
472
//START_TIMER
473
    src+= step*4; // src points to begin of the 8x8 Block
474
    for(y=0; y<8; y++){
475
        int numEq= 0;
476

    
477
        if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
478
        if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
479
        if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
480
        if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
481
        if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
482
        if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
483
        if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
484
        if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
485
        if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
486
        if(numEq > c->ppMode.flatnessThreshold){
487
            int min, max, x;
488

    
489
            if(src[0] > src[step]){
490
                max= src[0];
491
                min= src[step];
492
            }else{
493
                max= src[step];
494
                min= src[0];
495
            }
496
            for(x=2; x<8; x+=2){
497
                if(src[x*step] > src[(x+1)*step]){
498
                        if(src[x    *step] > max) max= src[ x   *step];
499
                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
500
                }else{
501
                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
502
                        if(src[ x   *step] < min) min= src[ x   *step];
503
                }
504
            }
505
            if(max-min < 2*QP){
506
                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
507
                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
508

    
509
                int sums[10];
510
                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
511
                sums[1] = sums[0] - first       + src[3*step];
512
                sums[2] = sums[1] - first       + src[4*step];
513
                sums[3] = sums[2] - first       + src[5*step];
514
                sums[4] = sums[3] - first       + src[6*step];
515
                sums[5] = sums[4] - src[0*step] + src[7*step];
516
                sums[6] = sums[5] - src[1*step] + last;
517
                sums[7] = sums[6] - src[2*step] + last;
518
                sums[8] = sums[7] - src[3*step] + last;
519
                sums[9] = sums[8] - src[4*step] + last;
520

    
521
                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
522
                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
523
                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
524
                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
525
                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
526
                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
527
                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
528
                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
529
            }
530
        }else{
531
            const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
532

    
533
            if(FFABS(middleEnergy) < 8*QP){
534
                const int q=(src[3*step] - src[4*step])/2;
535
                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
536
                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
537

    
538
                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
539
                d= FFMAX(d, 0);
540

    
541
                d= (5*d + 32) >> 6;
542
                d*= FFSIGN(-middleEnergy);
543

    
544
                if(q>0){
545
                    d= d<0 ? 0 : d;
546
                    d= d>q ? q : d;
547
                }else{
548
                    d= d>0 ? 0 : d;
549
                    d= d<q ? q : d;
550
                }
551

    
552
                src[3*step]-= d;
553
                src[4*step]+= d;
554
            }
555
        }
556

    
557
        src += stride;
558
    }
559
/*if(step==16){
560
    STOP_TIMER("step16")
561
}else{
562
    STOP_TIMER("stepX")
563
}*/
564
}
565

    
566
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
567
//Plain C versions
568
#if !(HAVE_MMX || HAVE_ALTIVEC) || CONFIG_RUNTIME_CPUDETECT
569
#define COMPILE_C
570
#endif
571

    
572
#if HAVE_ALTIVEC
573
#define COMPILE_ALTIVEC
574
#endif //HAVE_ALTIVEC
575

    
576
#if ARCH_X86
577

    
578
#if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
579
#define COMPILE_MMX
580
#endif
581

    
582
#if HAVE_MMX2 || CONFIG_RUNTIME_CPUDETECT
583
#define COMPILE_MMX2
584
#endif
585

    
586
#if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
587
#define COMPILE_3DNOW
588
#endif
589
#endif /* ARCH_X86 */
590

    
591
#undef HAVE_MMX
592
#define HAVE_MMX 0
593
#undef HAVE_MMX2
594
#define HAVE_MMX2 0
595
#undef HAVE_AMD3DNOW
596
#define HAVE_AMD3DNOW 0
597
#undef HAVE_ALTIVEC
598
#define HAVE_ALTIVEC 0
599

    
600
#ifdef COMPILE_C
601
#define RENAME(a) a ## _C
602
#include "postprocess_template.c"
603
#endif
604

    
605
#ifdef COMPILE_ALTIVEC
606
#undef RENAME
607
#undef HAVE_ALTIVEC
608
#define HAVE_ALTIVEC 1
609
#define RENAME(a) a ## _altivec
610
#include "postprocess_altivec_template.c"
611
#include "postprocess_template.c"
612
#endif
613

    
614
//MMX versions
615
#ifdef COMPILE_MMX
616
#undef RENAME
617
#undef HAVE_MMX
618
#define HAVE_MMX 1
619
#define RENAME(a) a ## _MMX
620
#include "postprocess_template.c"
621
#endif
622

    
623
//MMX2 versions
624
#ifdef COMPILE_MMX2
625
#undef RENAME
626
#undef HAVE_MMX
627
#undef HAVE_MMX2
628
#define HAVE_MMX 1
629
#define HAVE_MMX2 1
630
#define RENAME(a) a ## _MMX2
631
#include "postprocess_template.c"
632
#endif
633

    
634
//3DNOW versions
635
#ifdef COMPILE_3DNOW
636
#undef RENAME
637
#undef HAVE_MMX
638
#undef HAVE_MMX2
639
#undef HAVE_AMD3DNOW
640
#define HAVE_MMX 1
641
#define HAVE_MMX2 0
642
#define HAVE_AMD3DNOW 1
643
#define RENAME(a) a ## _3DNow
644
#include "postprocess_template.c"
645
#endif
646

    
647
// minor note: the HAVE_xyz is messed up after that line so do not use it.
648

    
649
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
650
        const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
651
{
652
    PPContext *c= (PPContext *)vc;
653
    PPMode *ppMode= (PPMode *)vm;
654
    c->ppMode= *ppMode; //FIXME
655

    
656
    // Using ifs here as they are faster than function pointers although the
657
    // difference would not be measurable here but it is much better because
658
    // someone might exchange the CPU whithout restarting MPlayer ;)
659
#if CONFIG_RUNTIME_CPUDETECT
660
#if ARCH_X86
661
    // ordered per speed fastest first
662
    if(c->cpuCaps & PP_CPU_CAPS_MMX2)
663
        postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
664
    else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
665
        postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
666
    else if(c->cpuCaps & PP_CPU_CAPS_MMX)
667
        postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
668
    else
669
        postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670
#else
671
#if HAVE_ALTIVEC
672
    if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
673
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
674
    else
675
#endif
676
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
677
#endif
678
#else //CONFIG_RUNTIME_CPUDETECT
679
#if   HAVE_MMX2
680
            postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
681
#elif HAVE_AMD3DNOW
682
            postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
683
#elif HAVE_MMX
684
            postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
685
#elif HAVE_ALTIVEC
686
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
687
#else
688
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
689
#endif
690
#endif //!CONFIG_RUNTIME_CPUDETECT
691
}
692

    
693
//static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
694
//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
695

    
696
/* -pp Command line Help
697
*/
698
#if LIBPOSTPROC_VERSION_INT < (52<<16)
699
const char *const pp_help=
700
#else
701
const char pp_help[] =
702
#endif
703
"Available postprocessing filters:\n"
704
"Filters                        Options\n"
705
"short  long name       short   long option     Description\n"
706
"*      *               a       autoq           CPU power dependent enabler\n"
707
"                       c       chrom           chrominance filtering enabled\n"
708
"                       y       nochrom         chrominance filtering disabled\n"
709
"                       n       noluma          luma filtering disabled\n"
710
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
711
"       1. difference factor: default=32, higher -> more deblocking\n"
712
"       2. flatness threshold: default=39, lower -> more deblocking\n"
713
"                       the h & v deblocking filters share these\n"
714
"                       so you can't set different thresholds for h / v\n"
715
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
716
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
717
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
718
"h1     x1hdeblock                              experimental h deblock filter 1\n"
719
"v1     x1vdeblock                              experimental v deblock filter 1\n"
720
"dr     dering                                  deringing filter\n"
721
"al     autolevels                              automatic brightness / contrast\n"
722
"                       f        fullyrange     stretch luminance to (0..255)\n"
723
"lb     linblenddeint                           linear blend deinterlacer\n"
724
"li     linipoldeint                            linear interpolating deinterlace\n"
725
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
726
"md     mediandeint                             median deinterlacer\n"
727
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
728
"l5     lowpass5                                FIR lowpass deinterlacer\n"
729
"de     default                                 hb:a,vb:a,dr:a\n"
730
"fa     fast                                    h1:a,v1:a,dr:a\n"
731
"ac                                             ha:a:128:7,va:a,dr:a\n"
732
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
733
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
734
"fq     forceQuant      <quantizer>             force quantizer\n"
735
"Usage:\n"
736
"<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
737
"long form example:\n"
738
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
739
"short form example:\n"
740
"vb:a/hb:a/lb                                   de,-vb\n"
741
"more examples:\n"
742
"tn:64:128:256\n"
743
"\n"
744
;
745

    
746
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
747
{
748
    char temp[GET_MODE_BUFFER_SIZE];
749
    char *p= temp;
750
    static const char filterDelimiters[] = ",/";
751
    static const char optionDelimiters[] = ":";
752
    struct PPMode *ppMode;
753
    char *filterToken;
754

    
755
    ppMode= av_malloc(sizeof(PPMode));
756

    
757
    ppMode->lumMode= 0;
758
    ppMode->chromMode= 0;
759
    ppMode->maxTmpNoise[0]= 700;
760
    ppMode->maxTmpNoise[1]= 1500;
761
    ppMode->maxTmpNoise[2]= 3000;
762
    ppMode->maxAllowedY= 234;
763
    ppMode->minAllowedY= 16;
764
    ppMode->baseDcDiff= 256/8;
765
    ppMode->flatnessThreshold= 56-16-1;
766
    ppMode->maxClippedThreshold= 0.01;
767
    ppMode->error=0;
768

    
769
    strncpy(temp, name, GET_MODE_BUFFER_SIZE);
770

    
771
    av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
772

    
773
    for(;;){
774
        char *filterName;
775
        int q= 1000000; //PP_QUALITY_MAX;
776
        int chrom=-1;
777
        int luma=-1;
778
        char *option;
779
        char *options[OPTIONS_ARRAY_SIZE];
780
        int i;
781
        int filterNameOk=0;
782
        int numOfUnknownOptions=0;
783
        int enable=1; //does the user want us to enabled or disabled the filter
784

    
785
        filterToken= strtok(p, filterDelimiters);
786
        if(filterToken == NULL) break;
787
        p+= strlen(filterToken) + 1; // p points to next filterToken
788
        filterName= strtok(filterToken, optionDelimiters);
789
        av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
790

    
791
        if(*filterName == '-'){
792
            enable=0;
793
            filterName++;
794
        }
795

    
796
        for(;;){ //for all options
797
            option= strtok(NULL, optionDelimiters);
798
            if(option == NULL) break;
799

    
800
            av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
801
            if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
802
            else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
803
            else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
804
            else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
805
            else{
806
                options[numOfUnknownOptions] = option;
807
                numOfUnknownOptions++;
808
            }
809
            if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
810
        }
811
        options[numOfUnknownOptions] = NULL;
812

    
813
        /* replace stuff from the replace Table */
814
        for(i=0; replaceTable[2*i]!=NULL; i++){
815
            if(!strcmp(replaceTable[2*i], filterName)){
816
                int newlen= strlen(replaceTable[2*i + 1]);
817
                int plen;
818
                int spaceLeft;
819

    
820
                if(p==NULL) p= temp, *p=0;      //last filter
821
                else p--, *p=',';               //not last filter
822

    
823
                plen= strlen(p);
824
                spaceLeft= p - temp + plen;
825
                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
826
                    ppMode->error++;
827
                    break;
828
                }
829
                memmove(p + newlen, p, plen+1);
830
                memcpy(p, replaceTable[2*i + 1], newlen);
831
                filterNameOk=1;
832
            }
833
        }
834

    
835
        for(i=0; filters[i].shortName!=NULL; i++){
836
            if(   !strcmp(filters[i].longName, filterName)
837
               || !strcmp(filters[i].shortName, filterName)){
838
                ppMode->lumMode &= ~filters[i].mask;
839
                ppMode->chromMode &= ~filters[i].mask;
840

    
841
                filterNameOk=1;
842
                if(!enable) break; // user wants to disable it
843

    
844
                if(q >= filters[i].minLumQuality && luma)
845
                    ppMode->lumMode|= filters[i].mask;
846
                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
847
                    if(q >= filters[i].minChromQuality)
848
                            ppMode->chromMode|= filters[i].mask;
849

    
850
                if(filters[i].mask == LEVEL_FIX){
851
                    int o;
852
                    ppMode->minAllowedY= 16;
853
                    ppMode->maxAllowedY= 234;
854
                    for(o=0; options[o]!=NULL; o++){
855
                        if(  !strcmp(options[o],"fullyrange")
856
                           ||!strcmp(options[o],"f")){
857
                            ppMode->minAllowedY= 0;
858
                            ppMode->maxAllowedY= 255;
859
                            numOfUnknownOptions--;
860
                        }
861
                    }
862
                }
863
                else if(filters[i].mask == TEMP_NOISE_FILTER)
864
                {
865
                    int o;
866
                    int numOfNoises=0;
867

    
868
                    for(o=0; options[o]!=NULL; o++){
869
                        char *tail;
870
                        ppMode->maxTmpNoise[numOfNoises]=
871
                            strtol(options[o], &tail, 0);
872
                        if(tail!=options[o]){
873
                            numOfNoises++;
874
                            numOfUnknownOptions--;
875
                            if(numOfNoises >= 3) break;
876
                        }
877
                    }
878
                }
879
                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
880
                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
881
                    int o;
882

    
883
                    for(o=0; options[o]!=NULL && o<2; o++){
884
                        char *tail;
885
                        int val= strtol(options[o], &tail, 0);
886
                        if(tail==options[o]) break;
887

    
888
                        numOfUnknownOptions--;
889
                        if(o==0) ppMode->baseDcDiff= val;
890
                        else ppMode->flatnessThreshold= val;
891
                    }
892
                }
893
                else if(filters[i].mask == FORCE_QUANT){
894
                    int o;
895
                    ppMode->forcedQuant= 15;
896

    
897
                    for(o=0; options[o]!=NULL && o<1; o++){
898
                        char *tail;
899
                        int val= strtol(options[o], &tail, 0);
900
                        if(tail==options[o]) break;
901

    
902
                        numOfUnknownOptions--;
903
                        ppMode->forcedQuant= val;
904
                    }
905
                }
906
            }
907
        }
908
        if(!filterNameOk) ppMode->error++;
909
        ppMode->error += numOfUnknownOptions;
910
    }
911

    
912
    av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
913
    if(ppMode->error){
914
        av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
915
        av_free(ppMode);
916
        return NULL;
917
    }
918
    return ppMode;
919
}
920

    
921
void pp_free_mode(pp_mode *mode){
922
    av_free(mode);
923
}
924

    
925
static void reallocAlign(void **p, int alignment, int size){
926
    av_free(*p);
927
    *p= av_mallocz(size);
928
}
929

    
930
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
931
    int mbWidth = (width+15)>>4;
932
    int mbHeight= (height+15)>>4;
933
    int i;
934

    
935
    c->stride= stride;
936
    c->qpStride= qpStride;
937

    
938
    reallocAlign((void **)&c->tempDst, 8, stride*24);
939
    reallocAlign((void **)&c->tempSrc, 8, stride*24);
940
    reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
941
    reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
942
    for(i=0; i<256; i++)
943
            c->yHistogram[i]= width*height/64*15/256;
944

    
945
    for(i=0; i<3; i++){
946
        //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
947
        reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
948
        reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
949
    }
950

    
951
    reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
952
    reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
953
    reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
954
    reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
955
}
956

    
957
static const char * context_to_name(void * ptr) {
958
    return "postproc";
959
}
960

    
961
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
962

    
963
pp_context *pp_get_context(int width, int height, int cpuCaps){
964
    PPContext *c= av_malloc(sizeof(PPContext));
965
    int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
966
    int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
967

    
968
    memset(c, 0, sizeof(PPContext));
969
    c->av_class = &av_codec_context_class;
970
    c->cpuCaps= cpuCaps;
971
    if(cpuCaps&PP_FORMAT){
972
        c->hChromaSubSample= cpuCaps&0x3;
973
        c->vChromaSubSample= (cpuCaps>>4)&0x3;
974
    }else{
975
        c->hChromaSubSample= 1;
976
        c->vChromaSubSample= 1;
977
    }
978

    
979
    reallocBuffers(c, width, height, stride, qpStride);
980

    
981
    c->frameNum=-1;
982

    
983
    return c;
984
}
985

    
986
void pp_free_context(void *vc){
987
    PPContext *c = (PPContext*)vc;
988
    int i;
989

    
990
    for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
991
    for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
992

    
993
    av_free(c->tempBlocks);
994
    av_free(c->yHistogram);
995
    av_free(c->tempDst);
996
    av_free(c->tempSrc);
997
    av_free(c->deintTemp);
998
    av_free(c->stdQPTable);
999
    av_free(c->nonBQPTable);
1000
    av_free(c->forcedQPTable);
1001

    
1002
    memset(c, 0, sizeof(PPContext));
1003

    
1004
    av_free(c);
1005
}
1006

    
1007
void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
1008
                     uint8_t * dst[3], const int dstStride[3],
1009
                     int width, int height,
1010
                     const QP_STORE_T *QP_store,  int QPStride,
1011
                     pp_mode *vm,  void *vc, int pict_type)
1012
{
1013
    int mbWidth = (width+15)>>4;
1014
    int mbHeight= (height+15)>>4;
1015
    PPMode *mode = (PPMode*)vm;
1016
    PPContext *c = (PPContext*)vc;
1017
    int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1018
    int absQPStride = FFABS(QPStride);
1019

    
1020
    // c->stride and c->QPStride are always positive
1021
    if(c->stride < minStride || c->qpStride < absQPStride)
1022
        reallocBuffers(c, width, height,
1023
                       FFMAX(minStride, c->stride),
1024
                       FFMAX(c->qpStride, absQPStride));
1025

    
1026
    if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1027
        int i;
1028
        QP_store= c->forcedQPTable;
1029
        absQPStride = QPStride = 0;
1030
        if(mode->lumMode & FORCE_QUANT)
1031
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1032
        else
1033
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1034
    }
1035

    
1036
    if(pict_type & PP_PICT_TYPE_QP2){
1037
        int i;
1038
        const int count= mbHeight * absQPStride;
1039
        for(i=0; i<(count>>2); i++){
1040
            ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1041
        }
1042
        for(i<<=2; i<count; i++){
1043
            c->stdQPTable[i] = QP_store[i]>>1;
1044
        }
1045
        QP_store= c->stdQPTable;
1046
        QPStride= absQPStride;
1047
    }
1048

    
1049
    if(0){
1050
        int x,y;
1051
        for(y=0; y<mbHeight; y++){
1052
            for(x=0; x<mbWidth; x++){
1053
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1054
            }
1055
            av_log(c, AV_LOG_INFO, "\n");
1056
        }
1057
        av_log(c, AV_LOG_INFO, "\n");
1058
    }
1059

    
1060
    if((pict_type&7)!=3){
1061
        if (QPStride >= 0){
1062
            int i;
1063
            const int count= mbHeight * QPStride;
1064
            for(i=0; i<(count>>2); i++){
1065
                ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1066
            }
1067
            for(i<<=2; i<count; i++){
1068
                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1069
            }
1070
        } else {
1071
            int i,j;
1072
            for(i=0; i<mbHeight; i++) {
1073
                for(j=0; j<absQPStride; j++) {
1074
                    c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1075
                }
1076
            }
1077
        }
1078
    }
1079

    
1080
    av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1081
           mode->lumMode, mode->chromMode);
1082

    
1083
    postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1084
                width, height, QP_store, QPStride, 0, mode, c);
1085

    
1086
    width  = (width )>>c->hChromaSubSample;
1087
    height = (height)>>c->vChromaSubSample;
1088

    
1089
    if(mode->chromMode){
1090
        postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1091
                    width, height, QP_store, QPStride, 1, mode, c);
1092
        postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1093
                    width, height, QP_store, QPStride, 2, mode, c);
1094
    }
1095
    else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1096
        linecpy(dst[1], src[1], height, srcStride[1]);
1097
        linecpy(dst[2], src[2], height, srcStride[2]);
1098
    }else{
1099
        int y;
1100
        for(y=0; y<height; y++){
1101
            memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1102
            memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1103
        }
1104
    }
1105
}
1106