Revision 16e0bf73 libpostproc/postprocess.c

View differences:

libpostproc/postprocess.c
116 116

  
117 117
static struct PPFilter filters[]=
118 118
{
119
        {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
120
        {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
121
/*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
122
        {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
123
        {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
124
        {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
125
        {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
126
        {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
127
        {"dr", "dering",                1, 5, 6, DERING},
128
        {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
129
        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
130
        {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
131
        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
132
        {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
133
        {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
134
        {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
135
        {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
136
        {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
137
        {NULL, NULL,0,0,0,0} //End Marker
119
    {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
120
    {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
121
/*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
122
    {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
123
    {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
124
    {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
125
    {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
126
    {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
127
    {"dr", "dering",                1, 5, 6, DERING},
128
    {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
129
    {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
130
    {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
131
    {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
132
    {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
133
    {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
134
    {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
135
    {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
136
    {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
137
    {NULL, NULL,0,0,0,0} //End Marker
138 138
};
139 139

  
140 140
static const char *replaceTable[]=
141 141
{
142
        "default",      "hb:a,vb:a,dr:a",
143
        "de",           "hb:a,vb:a,dr:a",
144
        "fast",         "h1:a,v1:a,dr:a",
145
        "fa",           "h1:a,v1:a,dr:a",
146
        "ac",           "ha:a:128:7,va:a,dr:a",
147
        NULL //End Marker
142
    "default",      "hb:a,vb:a,dr:a",
143
    "de",           "hb:a,vb:a,dr:a",
144
    "fast",         "h1:a,v1:a,dr:a",
145
    "fa",           "h1:a,v1:a,dr:a",
146
    "ac",           "ha:a:128:7,va:a,dr:a",
147
    NULL //End Marker
148 148
};
149 149

  
150 150

  
151 151
#if defined(ARCH_X86)
152 152
static inline void prefetchnta(void *p)
153 153
{
154
        asm volatile(   "prefetchnta (%0)\n\t"
155
                : : "r" (p)
156
        );
154
    asm volatile(   "prefetchnta (%0)\n\t"
155
        : : "r" (p)
156
    );
157 157
}
158 158

  
159 159
static inline void prefetcht0(void *p)
160 160
{
161
        asm volatile(   "prefetcht0 (%0)\n\t"
162
                : : "r" (p)
163
        );
161
    asm volatile(   "prefetcht0 (%0)\n\t"
162
        : : "r" (p)
163
    );
164 164
}
165 165

  
166 166
static inline void prefetcht1(void *p)
167 167
{
168
        asm volatile(   "prefetcht1 (%0)\n\t"
169
                : : "r" (p)
170
        );
168
    asm volatile(   "prefetcht1 (%0)\n\t"
169
        : : "r" (p)
170
    );
171 171
}
172 172

  
173 173
static inline void prefetcht2(void *p)
174 174
{
175
        asm volatile(   "prefetcht2 (%0)\n\t"
176
                : : "r" (p)
177
        );
175
    asm volatile(   "prefetcht2 (%0)\n\t"
176
        : : "r" (p)
177
    );
178 178
}
179 179
#endif
180 180

  
......
185 185
 */
186 186
static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
187 187
{
188
        int numEq= 0;
189
        int y;
190
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
191
        const int dcThreshold= dcOffset*2 + 1;
192

  
193
        for(y=0; y<BLOCK_SIZE; y++)
194
        {
195
                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
196
                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
197
                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
198
                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
199
                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
200
                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
201
                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
202
                src+= stride;
203
        }
204
        return numEq > c->ppMode.flatnessThreshold;
188
    int numEq= 0;
189
    int y;
190
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
191
    const int dcThreshold= dcOffset*2 + 1;
192

  
193
    for(y=0; y<BLOCK_SIZE; y++){
194
        if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
195
        if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
196
        if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
197
        if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
198
        if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
199
        if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
200
        if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
201
        src+= stride;
202
    }
203
    return numEq > c->ppMode.flatnessThreshold;
205 204
}
206 205

  
207 206
/**
208 207
 * Check if the middle 8x8 Block in the given 8x16 block is flat
209 208
 */
210
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
211
        int numEq= 0;
212
        int y;
213
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
214
        const int dcThreshold= dcOffset*2 + 1;
215

  
216
        src+= stride*4; // src points to begin of the 8x8 Block
217
        for(y=0; y<BLOCK_SIZE-1; y++)
218
        {
219
                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
220
                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
221
                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
222
                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
223
                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
224
                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
225
                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
226
                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
227
                src+= stride;
228
        }
229
        return numEq > c->ppMode.flatnessThreshold;
209
static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c)
210
{
211
    int numEq= 0;
212
    int y;
213
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
214
    const int dcThreshold= dcOffset*2 + 1;
215

  
216
    src+= stride*4; // src points to begin of the 8x8 Block
217
    for(y=0; y<BLOCK_SIZE-1; y++){
218
        if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
219
        if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
220
        if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
221
        if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
222
        if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
223
        if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
224
        if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
225
        if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
226
        src+= stride;
227
    }
228
    return numEq > c->ppMode.flatnessThreshold;
230 229
}
231 230

  
232 231
static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
233 232
{
234
        int i;
233
    int i;
235 234
#if 1
236
        for(i=0; i<2; i++){
237
                if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
238
                src += stride;
239
                if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
240
                src += stride;
241
                if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
242
                src += stride;
243
                if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
244
                src += stride;
245
        }
235
    for(i=0; i<2; i++){
236
        if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
237
        src += stride;
238
        if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
239
        src += stride;
240
        if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
241
        src += stride;
242
        if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
243
        src += stride;
244
    }
246 245
#else
247
        for(i=0; i<8; i++){
248
                if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
249
                src += stride;
250
        }
246
    for(i=0; i<8; i++){
247
        if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
248
        src += stride;
249
    }
251 250
#endif
252
        return 1;
251
    return 1;
253 252
}
254 253

  
255 254
static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
256 255
{
257 256
#if 1
258 257
#if 1
259
        int x;
260
        src+= stride*4;
261
        for(x=0; x<BLOCK_SIZE; x+=4)
262
        {
263
                if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
264
                if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
265
                if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
266
                if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
267
        }
258
    int x;
259
    src+= stride*4;
260
    for(x=0; x<BLOCK_SIZE; x+=4){
261
        if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
262
        if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
263
        if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
264
        if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
265
    }
268 266
#else
269
        int x;
270
        src+= stride*3;
271
        for(x=0; x<BLOCK_SIZE; x++)
272
        {
273
                if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
274
        }
267
    int x;
268
    src+= stride*3;
269
    for(x=0; x<BLOCK_SIZE; x++){
270
        if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
271
    }
275 272
#endif
276
        return 1;
273
    return 1;
277 274
#else
278
        int x;
279
        src+= stride*4;
280
        for(x=0; x<BLOCK_SIZE; x++)
281
        {
282
                int min=255;
283
                int max=0;
284
                int y;
285
                for(y=0; y<8; y++){
286
                        int v= src[x + y*stride];
287
                        if(v>max) max=v;
288
                        if(v<min) min=v;
289
                }
290
                if(max-min > 2*QP) return 0;
275
    int x;
276
    src+= stride*4;
277
    for(x=0; x<BLOCK_SIZE; x++){
278
        int min=255;
279
        int max=0;
280
        int y;
281
        for(y=0; y<8; y++){
282
            int v= src[x + y*stride];
283
            if(v>max) max=v;
284
            if(v<min) min=v;
291 285
        }
292
        return 1;
286
        if(max-min > 2*QP) return 0;
287
    }
288
    return 1;
293 289
#endif
294 290
}
295 291

  
296
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
297
        if( isHorizDC_C(src, stride, c) ){
298
                if( isHorizMinMaxOk_C(src, stride, c->QP) )
299
                        return 1;
300
                else
301
                        return 0;
302
        }else{
303
                return 2;
304
        }
292
static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c)
293
{
294
    if( isHorizDC_C(src, stride, c) ){
295
        if( isHorizMinMaxOk_C(src, stride, c->QP) )
296
            return 1;
297
        else
298
            return 0;
299
    }else{
300
        return 2;
301
    }
305 302
}
306 303

  
307
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
308
        if( isVertDC_C(src, stride, c) ){
309
                if( isVertMinMaxOk_C(src, stride, c->QP) )
310
                        return 1;
311
                else
312
                        return 0;
313
        }else{
314
                return 2;
315
        }
304
static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c)
305
{
306
    if( isVertDC_C(src, stride, c) ){
307
        if( isVertMinMaxOk_C(src, stride, c->QP) )
308
            return 1;
309
        else
310
            return 0;
311
    }else{
312
        return 2;
313
    }
316 314
}
317 315

  
318 316
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
319 317
{
320
        int y;
321
        for(y=0; y<BLOCK_SIZE; y++)
322
        {
323
                const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
324

  
325
                if(FFABS(middleEnergy) < 8*c->QP)
326
                {
327
                        const int q=(dst[3] - dst[4])/2;
328
                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
329
                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
330

  
331
                        int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
332
                        d= FFMAX(d, 0);
333

  
334
                        d= (5*d + 32) >> 6;
335
                        d*= FFSIGN(-middleEnergy);
336

  
337
                        if(q>0)
338
                        {
339
                                d= d<0 ? 0 : d;
340
                                d= d>q ? q : d;
341
                        }
342
                        else
343
                        {
344
                                d= d>0 ? 0 : d;
345
                                d= d<q ? q : d;
346
                        }
347

  
348
                        dst[3]-= d;
349
                        dst[4]+= d;
350
                }
351
                dst+= stride;
318
    int y;
319
    for(y=0; y<BLOCK_SIZE; y++){
320
        const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
321

  
322
        if(FFABS(middleEnergy) < 8*c->QP){
323
            const int q=(dst[3] - dst[4])/2;
324
            const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
325
            const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
326

  
327
            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
328
            d= FFMAX(d, 0);
329

  
330
            d= (5*d + 32) >> 6;
331
            d*= FFSIGN(-middleEnergy);
332

  
333
            if(q>0)
334
            {
335
                d= d<0 ? 0 : d;
336
                d= d>q ? q : d;
337
            }
338
            else
339
            {
340
                d= d>0 ? 0 : d;
341
                d= d<q ? q : d;
342
            }
343

  
344
            dst[3]-= d;
345
            dst[4]+= d;
352 346
        }
347
        dst+= stride;
348
    }
353 349
}
354 350

  
355 351
/**
......
358 354
 */
359 355
static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
360 356
{
361
        int y;
362
        for(y=0; y<BLOCK_SIZE; y++)
363
        {
364
                const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
365
                const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
366

  
367
                int sums[10];
368
                sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
369
                sums[1] = sums[0] - first  + dst[3];
370
                sums[2] = sums[1] - first  + dst[4];
371
                sums[3] = sums[2] - first  + dst[5];
372
                sums[4] = sums[3] - first  + dst[6];
373
                sums[5] = sums[4] - dst[0] + dst[7];
374
                sums[6] = sums[5] - dst[1] + last;
375
                sums[7] = sums[6] - dst[2] + last;
376
                sums[8] = sums[7] - dst[3] + last;
377
                sums[9] = sums[8] - dst[4] + last;
378

  
379
                dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
380
                dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
381
                dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
382
                dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
383
                dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
384
                dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
385
                dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
386
                dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
387

  
388
                dst+= stride;
389
        }
357
    int y;
358
    for(y=0; y<BLOCK_SIZE; y++){
359
        const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
360
        const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
361

  
362
        int sums[10];
363
        sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
364
        sums[1] = sums[0] - first  + dst[3];
365
        sums[2] = sums[1] - first  + dst[4];
366
        sums[3] = sums[2] - first  + dst[5];
367
        sums[4] = sums[3] - first  + dst[6];
368
        sums[5] = sums[4] - dst[0] + dst[7];
369
        sums[6] = sums[5] - dst[1] + last;
370
        sums[7] = sums[6] - dst[2] + last;
371
        sums[8] = sums[7] - dst[3] + last;
372
        sums[9] = sums[8] - dst[4] + last;
373

  
374
        dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
375
        dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
376
        dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
377
        dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
378
        dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
379
        dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
380
        dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
381
        dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
382

  
383
        dst+= stride;
384
    }
390 385
}
391 386

  
392 387
/**
......
399 394
 */
400 395
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
401 396
{
402
        int y;
403
        static uint64_t *lut= NULL;
404
        if(lut==NULL)
397
    int y;
398
    static uint64_t *lut= NULL;
399
    if(lut==NULL)
400
    {
401
        int i;
402
        lut = av_malloc(256*8);
403
        for(i=0; i<256; i++)
405 404
        {
406
                int i;
407
                lut = av_malloc(256*8);
408
                for(i=0; i<256; i++)
409
                {
410
                        int v= i < 128 ? 2*i : 2*(i-256);
405
            int v= i < 128 ? 2*i : 2*(i-256);
411 406
/*
412 407
//Simulate 112242211 9-Tap filter
413
                        uint64_t a= (v/16) & 0xFF;
414
                        uint64_t b= (v/8) & 0xFF;
415
                        uint64_t c= (v/4) & 0xFF;
416
                        uint64_t d= (3*v/8) & 0xFF;
408
            uint64_t a= (v/16)  & 0xFF;
409
            uint64_t b= (v/8)   & 0xFF;
410
            uint64_t c= (v/4)   & 0xFF;
411
            uint64_t d= (3*v/8) & 0xFF;
417 412
*/
418 413
//Simulate piecewise linear interpolation
419
                        uint64_t a= (v/16) & 0xFF;
420
                        uint64_t b= (v*3/16) & 0xFF;
421
                        uint64_t c= (v*5/16) & 0xFF;
422
                        uint64_t d= (7*v/16) & 0xFF;
423
                        uint64_t A= (0x100 - a)&0xFF;
424
                        uint64_t B= (0x100 - b)&0xFF;
425
                        uint64_t C= (0x100 - c)&0xFF;
426
                        uint64_t D= (0x100 - c)&0xFF;
427

  
428
                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
429
                                (D<<24) | (C<<16) | (B<<8) | (A);
430
                        //lut[i] = (v<<32) | (v<<24);
431
                }
414
            uint64_t a= (v/16)   & 0xFF;
415
            uint64_t b= (v*3/16) & 0xFF;
416
            uint64_t c= (v*5/16) & 0xFF;
417
            uint64_t d= (7*v/16) & 0xFF;
418
            uint64_t A= (0x100 - a)&0xFF;
419
            uint64_t B= (0x100 - b)&0xFF;
420
            uint64_t C= (0x100 - c)&0xFF;
421
            uint64_t D= (0x100 - c)&0xFF;
422

  
423
            lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
424
                       (D<<24) | (C<<16) | (B<<8)  | (A);
425
            //lut[i] = (v<<32) | (v<<24);
432 426
        }
427
    }
433 428

  
434
        for(y=0; y<BLOCK_SIZE; y++)
435
        {
436
                int a= src[1] - src[2];
437
                int b= src[3] - src[4];
438
                int c= src[5] - src[6];
439

  
440
                int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
429
    for(y=0; y<BLOCK_SIZE; y++){
430
        int a= src[1] - src[2];
431
        int b= src[3] - src[4];
432
        int c= src[5] - src[6];
441 433

  
442
                if(d < QP)
443
                {
444
                        int v = d * FFSIGN(-b);
434
        int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
445 435

  
446
                        src[1] +=v/8;
447
                        src[2] +=v/4;
448
                        src[3] +=3*v/8;
449
                        src[4] -=3*v/8;
450
                        src[5] -=v/4;
451
                        src[6] -=v/8;
436
        if(d < QP){
437
            int v = d * FFSIGN(-b);
452 438

  
453
                }
454
                src+=stride;
439
            src[1] +=v/8;
440
            src[2] +=v/4;
441
            src[3] +=3*v/8;
442
            src[4] -=3*v/8;
443
            src[5] -=v/4;
444
            src[6] -=v/8;
455 445
        }
446
        src+=stride;
447
    }
456 448
}
457 449

  
458 450
/**
459 451
 * accurate deblock filter
460 452
 */
461 453
static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
462
        int y;
463
        const int QP= c->QP;
464
        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
465
        const int dcThreshold= dcOffset*2 + 1;
454
    int y;
455
    const int QP= c->QP;
456
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
457
    const int dcThreshold= dcOffset*2 + 1;
466 458
//START_TIMER
467
        src+= step*4; // src points to begin of the 8x8 Block
468
        for(y=0; y<8; y++){
469
                int numEq= 0;
470

  
471
                if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
472
                if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
473
                if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
474
                if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
475
                if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
476
                if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
477
                if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
478
                if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
479
                if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
480
                if(numEq > c->ppMode.flatnessThreshold){
481
                        int min, max, x;
482

  
483
                        if(src[0] > src[step]){
484
                            max= src[0];
485
                            min= src[step];
486
                        }else{
487
                            max= src[step];
488
                            min= src[0];
489
                        }
490
                        for(x=2; x<8; x+=2){
491
                                if(src[x*step] > src[(x+1)*step]){
492
                                        if(src[x    *step] > max) max= src[ x   *step];
493
                                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
494
                                }else{
495
                                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
496
                                        if(src[ x   *step] < min) min= src[ x   *step];
497
                                }
498
                        }
499
                        if(max-min < 2*QP){
500
                                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
501
                                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
502

  
503
                                int sums[10];
504
                                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
505
                                sums[1] = sums[0] - first       + src[3*step];
506
                                sums[2] = sums[1] - first       + src[4*step];
507
                                sums[3] = sums[2] - first       + src[5*step];
508
                                sums[4] = sums[3] - first       + src[6*step];
509
                                sums[5] = sums[4] - src[0*step] + src[7*step];
510
                                sums[6] = sums[5] - src[1*step] + last;
511
                                sums[7] = sums[6] - src[2*step] + last;
512
                                sums[8] = sums[7] - src[3*step] + last;
513
                                sums[9] = sums[8] - src[4*step] + last;
514

  
515
                                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
516
                                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
517
                                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
518
                                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
519
                                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
520
                                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
521
                                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
522
                                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
523
                        }
459
    src+= step*4; // src points to begin of the 8x8 Block
460
    for(y=0; y<8; y++){
461
        int numEq= 0;
462

  
463
        if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
464
        if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
465
        if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
466
        if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
467
        if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
468
        if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
469
        if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
470
        if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
471
        if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
472
        if(numEq > c->ppMode.flatnessThreshold){
473
            int min, max, x;
474

  
475
            if(src[0] > src[step]){
476
                max= src[0];
477
                min= src[step];
478
            }else{
479
                max= src[step];
480
                min= src[0];
481
            }
482
            for(x=2; x<8; x+=2){
483
                if(src[x*step] > src[(x+1)*step]){
484
                        if(src[x    *step] > max) max= src[ x   *step];
485
                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
524 486
                }else{
525
                        const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
526

  
527
                        if(FFABS(middleEnergy) < 8*QP)
528
                        {
529
                                const int q=(src[3*step] - src[4*step])/2;
530
                                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
531
                                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
532

  
533
                                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
534
                                d= FFMAX(d, 0);
535

  
536
                                d= (5*d + 32) >> 6;
537
                                d*= FFSIGN(-middleEnergy);
538

  
539
                                if(q>0)
540
                                {
541
                                        d= d<0 ? 0 : d;
542
                                        d= d>q ? q : d;
543
                                }
544
                                else
545
                                {
546
                                        d= d>0 ? 0 : d;
547
                                        d= d<q ? q : d;
548
                                }
549

  
550
                                src[3*step]-= d;
551
                                src[4*step]+= d;
552
                        }
487
                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
488
                        if(src[ x   *step] < min) min= src[ x   *step];
489
                }
490
            }
491
            if(max-min < 2*QP){
492
                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
493
                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
494

  
495
                int sums[10];
496
                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
497
                sums[1] = sums[0] - first       + src[3*step];
498
                sums[2] = sums[1] - first       + src[4*step];
499
                sums[3] = sums[2] - first       + src[5*step];
500
                sums[4] = sums[3] - first       + src[6*step];
501
                sums[5] = sums[4] - src[0*step] + src[7*step];
502
                sums[6] = sums[5] - src[1*step] + last;
503
                sums[7] = sums[6] - src[2*step] + last;
504
                sums[8] = sums[7] - src[3*step] + last;
505
                sums[9] = sums[8] - src[4*step] + last;
506

  
507
                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
508
                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
509
                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
510
                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
511
                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
512
                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
513
                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
514
                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
515
            }
516
        }else{
517
            const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
518

  
519
            if(FFABS(middleEnergy) < 8*QP){
520
                const int q=(src[3*step] - src[4*step])/2;
521
                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
522
                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
523

  
524
                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
525
                d= FFMAX(d, 0);
526

  
527
                d= (5*d + 32) >> 6;
528
                d*= FFSIGN(-middleEnergy);
529

  
530
                if(q>0){
531
                    d= d<0 ? 0 : d;
532
                    d= d>q ? q : d;
533
                }else{
534
                    d= d>0 ? 0 : d;
535
                    d= d<q ? q : d;
553 536
                }
554 537

  
555
                src += stride;
538
                src[3*step]-= d;
539
                src[4*step]+= d;
540
            }
556 541
        }
542

  
543
        src += stride;
544
    }
557 545
/*if(step==16){
558 546
    STOP_TIMER("step16")
559 547
}else{
......
642 630
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
643 631
        const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
644 632
{
645
        PPContext *c= (PPContext *)vc;
646
        PPMode *ppMode= (PPMode *)vm;
647
        c->ppMode= *ppMode; //FIXME
633
    PPContext *c= (PPContext *)vc;
634
    PPMode *ppMode= (PPMode *)vm;
635
    c->ppMode= *ppMode; //FIXME
648 636

  
649
        // Using ifs here as they are faster than function pointers although the
650
        // difference would not be measurable here but it is much better because
651
        // someone might exchange the CPU whithout restarting MPlayer ;)
637
    // Using ifs here as they are faster than function pointers although the
638
    // difference would not be measurable here but it is much better because
639
    // someone might exchange the CPU whithout restarting MPlayer ;)
652 640
#ifdef RUNTIME_CPUDETECT
653 641
#if defined(ARCH_X86)
654
        // ordered per speed fastest first
655
        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
656
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
657
        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
658
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
659
        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
660
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
661
        else
662
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
642
    // ordered per speed fastest first
643
    if(c->cpuCaps & PP_CPU_CAPS_MMX2)
644
        postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
645
    else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
646
        postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
647
    else if(c->cpuCaps & PP_CPU_CAPS_MMX)
648
        postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
649
    else
650
        postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
663 651
#else
664 652
#ifdef HAVE_ALTIVEC
665
        if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
666
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
667
        else
653
    if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
654
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
655
    else
668 656
#endif
669
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
657
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
670 658
#endif
671 659
#else //RUNTIME_CPUDETECT
672 660
#ifdef HAVE_MMX2
673
                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
661
            postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
674 662
#elif defined (HAVE_3DNOW)
675
                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
663
            postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
676 664
#elif defined (HAVE_MMX)
677
                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
665
            postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
678 666
#elif defined (HAVE_ALTIVEC)
679
                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
667
            postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
680 668
#else
681
                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
669
            postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
682 670
#endif
683 671
#endif //!RUNTIME_CPUDETECT
684 672
}
......
738 726

  
739 727
pp_mode_t *pp_get_mode_by_name_and_quality(const char *name, int quality)
740 728
{
741
        char temp[GET_MODE_BUFFER_SIZE];
742
        char *p= temp;
743
        static const char filterDelimiters[] = ",/";
744
        static const char optionDelimiters[] = ":";
745
        struct PPMode *ppMode;
746
        char *filterToken;
747

  
748
        ppMode= av_malloc(sizeof(PPMode));
749

  
750
        ppMode->lumMode= 0;
751
        ppMode->chromMode= 0;
752
        ppMode->maxTmpNoise[0]= 700;
753
        ppMode->maxTmpNoise[1]= 1500;
754
        ppMode->maxTmpNoise[2]= 3000;
755
        ppMode->maxAllowedY= 234;
756
        ppMode->minAllowedY= 16;
757
        ppMode->baseDcDiff= 256/8;
758
        ppMode->flatnessThreshold= 56-16-1;
759
        ppMode->maxClippedThreshold= 0.01;
760
        ppMode->error=0;
761

  
762
        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
763

  
764
        av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
765

  
766
        for(;;){
767
                char *filterName;
768
                int q= 1000000; //PP_QUALITY_MAX;
769
                int chrom=-1;
770
                int luma=-1;
771
                char *option;
772
                char *options[OPTIONS_ARRAY_SIZE];
773
                int i;
774
                int filterNameOk=0;
775
                int numOfUnknownOptions=0;
776
                int enable=1; //does the user want us to enabled or disabled the filter
777

  
778
                filterToken= strtok(p, filterDelimiters);
779
                if(filterToken == NULL) break;
780
                p+= strlen(filterToken) + 1; // p points to next filterToken
781
                filterName= strtok(filterToken, optionDelimiters);
782
                av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
783

  
784
                if(*filterName == '-')
785
                {
786
                        enable=0;
787
                        filterName++;
788
                }
729
    char temp[GET_MODE_BUFFER_SIZE];
730
    char *p= temp;
731
    static const char filterDelimiters[] = ",/";
732
    static const char optionDelimiters[] = ":";
733
    struct PPMode *ppMode;
734
    char *filterToken;
735

  
736
    ppMode= av_malloc(sizeof(PPMode));
737

  
738
    ppMode->lumMode= 0;
739
    ppMode->chromMode= 0;
740
    ppMode->maxTmpNoise[0]= 700;
741
    ppMode->maxTmpNoise[1]= 1500;
742
    ppMode->maxTmpNoise[2]= 3000;
743
    ppMode->maxAllowedY= 234;
744
    ppMode->minAllowedY= 16;
745
    ppMode->baseDcDiff= 256/8;
746
    ppMode->flatnessThreshold= 56-16-1;
747
    ppMode->maxClippedThreshold= 0.01;
748
    ppMode->error=0;
749

  
750
    strncpy(temp, name, GET_MODE_BUFFER_SIZE);
751

  
752
    av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
753

  
754
    for(;;){
755
        char *filterName;
756
        int q= 1000000; //PP_QUALITY_MAX;
757
        int chrom=-1;
758
        int luma=-1;
759
        char *option;
760
        char *options[OPTIONS_ARRAY_SIZE];
761
        int i;
762
        int filterNameOk=0;
763
        int numOfUnknownOptions=0;
764
        int enable=1; //does the user want us to enabled or disabled the filter
765

  
766
        filterToken= strtok(p, filterDelimiters);
767
        if(filterToken == NULL) break;
768
        p+= strlen(filterToken) + 1; // p points to next filterToken
769
        filterName= strtok(filterToken, optionDelimiters);
770
        av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
771

  
772
        if(*filterName == '-'){
773
            enable=0;
774
            filterName++;
775
        }
789 776

  
790
                for(;;){ //for all options
791
                        option= strtok(NULL, optionDelimiters);
792
                        if(option == NULL) break;
793

  
794
                        av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
795
                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
796
                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
797
                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
798
                        else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
799
                        else
800
                        {
801
                                options[numOfUnknownOptions] = option;
802
                                numOfUnknownOptions++;
803
                        }
804
                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
777
        for(;;){ //for all options
778
            option= strtok(NULL, optionDelimiters);
779
            if(option == NULL) break;
780

  
781
            av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
782
            if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
783
            else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
784
            else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
785
            else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
786
            else{
787
                options[numOfUnknownOptions] = option;
788
                numOfUnknownOptions++;
789
            }
790
            if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
791
        }
792
        options[numOfUnknownOptions] = NULL;
793

  
794
        /* replace stuff from the replace Table */
795
        for(i=0; replaceTable[2*i]!=NULL; i++){
796
            if(!strcmp(replaceTable[2*i], filterName)){
797
                int newlen= strlen(replaceTable[2*i + 1]);
798
                int plen;
799
                int spaceLeft;
800

  
801
                if(p==NULL) p= temp, *p=0;      //last filter
802
                else p--, *p=',';               //not last filter
803

  
804
                plen= strlen(p);
805
                spaceLeft= p - temp + plen;
806
                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE){
807
                    ppMode->error++;
808
                    break;
805 809
                }
806
                options[numOfUnknownOptions] = NULL;
810
                memmove(p + newlen, p, plen+1);
811
                memcpy(p, replaceTable[2*i + 1], newlen);
812
                filterNameOk=1;
813
            }
814
        }
807 815

  
808
                /* replace stuff from the replace Table */
809
                for(i=0; replaceTable[2*i]!=NULL; i++)
810
                {
811
                        if(!strcmp(replaceTable[2*i], filterName))
812
                        {
813
                                int newlen= strlen(replaceTable[2*i + 1]);
814
                                int plen;
815
                                int spaceLeft;
816

  
817
                                if(p==NULL) p= temp, *p=0;      //last filter
818
                                else p--, *p=',';               //not last filter
819

  
820
                                plen= strlen(p);
821
                                spaceLeft= p - temp + plen;
822
                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
823
                                {
824
                                        ppMode->error++;
825
                                        break;
826
                                }
827
                                memmove(p + newlen, p, plen+1);
828
                                memcpy(p, replaceTable[2*i + 1], newlen);
829
                                filterNameOk=1;
816
        for(i=0; filters[i].shortName!=NULL; i++){
817
            if(   !strcmp(filters[i].longName, filterName)
818
               || !strcmp(filters[i].shortName, filterName)){
819
                ppMode->lumMode &= ~filters[i].mask;
820
                ppMode->chromMode &= ~filters[i].mask;
821

  
822
                filterNameOk=1;
823
                if(!enable) break; // user wants to disable it
824

  
825
                if(q >= filters[i].minLumQuality && luma)
826
                    ppMode->lumMode|= filters[i].mask;
827
                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
828
                    if(q >= filters[i].minChromQuality)
829
                            ppMode->chromMode|= filters[i].mask;
830

  
831
                if(filters[i].mask == LEVEL_FIX){
832
                    int o;
833
                    ppMode->minAllowedY= 16;
834
                    ppMode->maxAllowedY= 234;
835
                    for(o=0; options[o]!=NULL; o++){
836
                        if(  !strcmp(options[o],"fullyrange")
837
                           ||!strcmp(options[o],"f")){
838
                            ppMode->minAllowedY= 0;
839
                            ppMode->maxAllowedY= 255;
840
                            numOfUnknownOptions--;
830 841
                        }
842
                    }
831 843
                }
832

  
833
                for(i=0; filters[i].shortName!=NULL; i++)
844
                else if(filters[i].mask == TEMP_NOISE_FILTER)
834 845
                {
835
                        if(   !strcmp(filters[i].longName, filterName)
836
                           || !strcmp(filters[i].shortName, filterName))
837
                        {
838
                                ppMode->lumMode &= ~filters[i].mask;
839
                                ppMode->chromMode &= ~filters[i].mask;
840

  
841
                                filterNameOk=1;
842
                                if(!enable) break; // user wants to disable it
843

  
844
                                if(q >= filters[i].minLumQuality && luma)
845
                                        ppMode->lumMode|= filters[i].mask;
846
                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
847
                                        if(q >= filters[i].minChromQuality)
848
                                                ppMode->chromMode|= filters[i].mask;
849

  
850
                                if(filters[i].mask == LEVEL_FIX)
851
                                {
852
                                        int o;
853
                                        ppMode->minAllowedY= 16;
854
                                        ppMode->maxAllowedY= 234;
855
                                        for(o=0; options[o]!=NULL; o++)
856
                                        {
857
                                                if(  !strcmp(options[o],"fullyrange")
858
                                                   ||!strcmp(options[o],"f"))
859
                                                {
860
                                                        ppMode->minAllowedY= 0;
861
                                                        ppMode->maxAllowedY= 255;
862
                                                        numOfUnknownOptions--;
863
                                                }
864
                                        }
865
                                }
866
                                else if(filters[i].mask == TEMP_NOISE_FILTER)
867
                                {
868
                                        int o;
869
                                        int numOfNoises=0;
870

  
871
                                        for(o=0; options[o]!=NULL; o++)
872
                                        {
873
                                                char *tail;
874
                                                ppMode->maxTmpNoise[numOfNoises]=
875
                                                        strtol(options[o], &tail, 0);
876
                                                if(tail!=options[o])
877
                                                {
878
                                                        numOfNoises++;
879
                                                        numOfUnknownOptions--;
880
                                                        if(numOfNoises >= 3) break;
881
                                                }
882
                                        }
883
                                }
884
                                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
885
                                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
886
                                {
887
                                        int o;
888

  
889
                                        for(o=0; options[o]!=NULL && o<2; o++)
890
                                        {
891
                                                char *tail;
892
                                                int val= strtol(options[o], &tail, 0);
893
                                                if(tail==options[o]) break;
894

  
895
                                                numOfUnknownOptions--;
896
                                                if(o==0) ppMode->baseDcDiff= val;
897
                                                else ppMode->flatnessThreshold= val;
898
                                        }
899
                                }
900
                                else if(filters[i].mask == FORCE_QUANT)
901
                                {
902
                                        int o;
903
                                        ppMode->forcedQuant= 15;
904

  
905
                                        for(o=0; options[o]!=NULL && o<1; o++)
906
                                        {
907
                                                char *tail;
908
                                                int val= strtol(options[o], &tail, 0);
909
                                                if(tail==options[o]) break;
910

  
911
                                                numOfUnknownOptions--;
912
                                                ppMode->forcedQuant= val;
913
                                        }
914
                                }
846
                    int o;
847
                    int numOfNoises=0;
848

  
849
                    for(o=0; options[o]!=NULL; o++){
850
                        char *tail;
851
                        ppMode->maxTmpNoise[numOfNoises]=
852
                            strtol(options[o], &tail, 0);
853
                        if(tail!=options[o]){
854
                            numOfNoises++;
855
                            numOfUnknownOptions--;
856
                            if(numOfNoises >= 3) break;
915 857
                        }
858
                    }
916 859
                }
917
                if(!filterNameOk) ppMode->error++;
918
                ppMode->error += numOfUnknownOptions;
919
        }
920

  
921
        av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
922
        if(ppMode->error)
923
        {
924
                av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
925
                av_free(ppMode);
926
                return NULL;
860
                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
861
                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
862
                    int o;
863

  
864
                    for(o=0; options[o]!=NULL && o<2; o++){
865
                        char *tail;
866
                        int val= strtol(options[o], &tail, 0);
867
                        if(tail==options[o]) break;
868

  
869
                        numOfUnknownOptions--;
870
                        if(o==0) ppMode->baseDcDiff= val;
871
                        else ppMode->flatnessThreshold= val;
872
                    }
873
                }
874
                else if(filters[i].mask == FORCE_QUANT){
875
                    int o;
876
                    ppMode->forcedQuant= 15;
877

  
878
                    for(o=0; options[o]!=NULL && o<1; o++){
879
                        char *tail;
880
                        int val= strtol(options[o], &tail, 0);
881
                        if(tail==options[o]) break;
882

  
883
                        numOfUnknownOptions--;
884
                        ppMode->forcedQuant= val;
885
                    }
886
                }
887
            }
927 888
        }
928
        return ppMode;
889
        if(!filterNameOk) ppMode->error++;
890
        ppMode->error += numOfUnknownOptions;
891
    }
892

  
893
    av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
894
    if(ppMode->error){
895
        av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
896
        av_free(ppMode);
897
        return NULL;
898
    }
899
    return ppMode;
929 900
}
930 901

  
931 902
void pp_free_mode(pp_mode_t *mode){
......
933 904
}
934 905

  
935 906
static void reallocAlign(void **p, int alignment, int size){
936
        av_free(*p);
937
        *p= av_mallocz(size);
907
    av_free(*p);
908
    *p= av_mallocz(size);
938 909
}
939 910

  
940 911
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
941
        int mbWidth = (width+15)>>4;
942
        int mbHeight= (height+15)>>4;
943
        int i;
944

  
945
        c->stride= stride;
946
        c->qpStride= qpStride;
947

  
948
        reallocAlign((void **)&c->tempDst, 8, stride*24);
949
        reallocAlign((void **)&c->tempSrc, 8, stride*24);
950
        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
951
        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
952
        for(i=0; i<256; i++)
953
                c->yHistogram[i]= width*height/64*15/256;
954

  
955
        for(i=0; i<3; i++)
956
        {
957
                //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
958
                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
959
                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
960
        }
961

  
962
        reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
963
        reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
964
        reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
965
        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
912
    int mbWidth = (width+15)>>4;
913
    int mbHeight= (height+15)>>4;
914
    int i;
915

  
916
    c->stride= stride;
917
    c->qpStride= qpStride;
918

  
919
    reallocAlign((void **)&c->tempDst, 8, stride*24);
920
    reallocAlign((void **)&c->tempSrc, 8, stride*24);
921
    reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
922
    reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
923
    for(i=0; i<256; i++)
924
            c->yHistogram[i]= width*height/64*15/256;
925

  
926
    for(i=0; i<3; i++){
927
        //Note: The +17*1024 is just there so i do not have to worry about r/w over the end.
928
        reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
929
        reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
930
    }
931

  
932
    reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
933
    reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
934
    reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
935
    reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
966 936
}
967 937

  
968 938
static const char * context_to_name(void * ptr) {
......
972 942
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
973 943

  
974 944
pp_context_t *pp_get_context(int width, int height, int cpuCaps){
975
        PPContext *c= av_malloc(sizeof(PPContext));
976
        int stride= (width+15)&(~15);    //assumed / will realloc if needed
977
        int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
978

  
979
        memset(c, 0, sizeof(PPContext));
980
        c->av_class = &av_codec_context_class;
981
        c->cpuCaps= cpuCaps;
982
        if(cpuCaps&PP_FORMAT){
983
                c->hChromaSubSample= cpuCaps&0x3;
984
                c->vChromaSubSample= (cpuCaps>>4)&0x3;
985
        }else{
986
                c->hChromaSubSample= 1;
987
                c->vChromaSubSample= 1;
988
        }
989

  
990
        reallocBuffers(c, width, height, stride, qpStride);
991

  
992
        c->frameNum=-1;
993

  
994
        return c;
945
    PPContext *c= av_malloc(sizeof(PPContext));
946
    int stride= (width+15)&(~15);    //assumed / will realloc if needed
947
    int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
948

  
949
    memset(c, 0, sizeof(PPContext));
950
    c->av_class = &av_codec_context_class;
951
    c->cpuCaps= cpuCaps;
952
    if(cpuCaps&PP_FORMAT){
953
        c->hChromaSubSample= cpuCaps&0x3;
954
        c->vChromaSubSample= (cpuCaps>>4)&0x3;
955
    }else{
956
        c->hChromaSubSample= 1;
957
        c->vChromaSubSample= 1;
958
    }
959

  
960
    reallocBuffers(c, width, height, stride, qpStride);
961

  
962
    c->frameNum=-1;
963

  
964
    return c;
995 965
}
996 966

  
997 967
void pp_free_context(void *vc){
998
        PPContext *c = (PPContext*)vc;
999
        int i;
968
    PPContext *c = (PPContext*)vc;
969
    int i;
1000 970

  
1001
        for(i=0; i<3; i++) av_free(c->tempBlured[i]);
1002
        for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
971
    for(i=0; i<3; i++) av_free(c->tempBlured[i]);
972
    for(i=0; i<3; i++) av_free(c->tempBluredPast[i]);
1003 973

  
1004
        av_free(c->tempBlocks);
1005
        av_free(c->yHistogram);
1006
        av_free(c->tempDst);
1007
        av_free(c->tempSrc);
1008
        av_free(c->deintTemp);
1009
        av_free(c->stdQPTable);
1010
        av_free(c->nonBQPTable);
1011
        av_free(c->forcedQPTable);
974
    av_free(c->tempBlocks);
975
    av_free(c->yHistogram);
976
    av_free(c->tempDst);
977
    av_free(c->tempSrc);
978
    av_free(c->deintTemp);
979
    av_free(c->stdQPTable);
980
    av_free(c->nonBQPTable);
981
    av_free(c->forcedQPTable);
1012 982

  
1013
        memset(c, 0, sizeof(PPContext));
983
    memset(c, 0, sizeof(PPContext));
1014 984

  
1015
        av_free(c);
985
    av_free(c);
1016 986
}
1017 987

  
1018 988
void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
1019
                 uint8_t * dst[3], const int dstStride[3],
1020
                 int width, int height,
1021
                 const QP_STORE_T *QP_store,  int QPStride,
1022
                 pp_mode_t *vm,  void *vc, int pict_type)
989
                     uint8_t * dst[3], const int dstStride[3],
990
                     int width, int height,
991
                     const QP_STORE_T *QP_store,  int QPStride,
992
                     pp_mode_t *vm,  void *vc, int pict_type)
1023 993
{
1024
        int mbWidth = (width+15)>>4;
1025
        int mbHeight= (height+15)>>4;
1026
        PPMode *mode = (PPMode*)vm;
1027
        PPContext *c = (PPContext*)vc;
1028
        int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
1029
        int absQPStride = FFABS(QPStride);
1030

  
1031
        // c->stride and c->QPStride are always positive
1032
        if(c->stride < minStride || c->qpStride < absQPStride)
1033
                reallocBuffers(c, width, height,
1034
                                FFMAX(minStride, c->stride),
1035
                                FFMAX(c->qpStride, absQPStride));
1036

  
1037
        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
1038
        {
1039
                int i;
1040
                QP_store= c->forcedQPTable;
1041
                absQPStride = QPStride = 0;
1042
                if(mode->lumMode & FORCE_QUANT)
1043
                        for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1044
                else
1045
                        for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1046
        }
994
    int mbWidth = (width+15)>>4;
995
    int mbHeight= (height+15)>>4;
996
    PPMode *mode = (PPMode*)vm;
997
    PPContext *c = (PPContext*)vc;
998
    int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
999
    int absQPStride = FFABS(QPStride);
1000

  
1001
    // c->stride and c->QPStride are always positive
1002
    if(c->stride < minStride || c->qpStride < absQPStride)
1003
        reallocBuffers(c, width, height,
1004
                       FFMAX(minStride, c->stride),
1005
                       FFMAX(c->qpStride, absQPStride));
1006

  
1007
    if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
1008
        int i;
1009
        QP_store= c->forcedQPTable;
1010
        absQPStride = QPStride = 0;
1011
        if(mode->lumMode & FORCE_QUANT)
1012
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= mode->forcedQuant;
1013
        else
1014
            for(i=0; i<mbWidth; i++) c->forcedQPTable[i]= 1;
1015
    }
1047 1016

  
1048
        if(pict_type & PP_PICT_TYPE_QP2){
1049
                int i;
1050
                const int count= mbHeight * absQPStride;
1051
                for(i=0; i<(count>>2); i++){
1052
                        ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1053
                }
1054
                for(i<<=2; i<count; i++){
1055
                        c->stdQPTable[i] = QP_store[i]>>1;
1056
                }
1057
                QP_store= c->stdQPTable;
1058
                QPStride= absQPStride;
1017
    if(pict_type & PP_PICT_TYPE_QP2){
1018
        int i;
1019
        const int count= mbHeight * absQPStride;
1020
        for(i=0; i<(count>>2); i++){
1021
            ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
1059 1022
        }
1060

  
1061
if(0){
1062
int x,y;
1063
for(y=0; y<mbHeight; y++){
1064
        for(x=0; x<mbWidth; x++){
1023
        for(i<<=2; i<count; i++){
1024
            c->stdQPTable[i] = QP_store[i]>>1;
1025
        }
1026
        QP_store= c->stdQPTable;
1027
        QPStride= absQPStride;
1028
    }
1029

  
1030
    if(0){
1031
        int x,y;
1032
        for(y=0; y<mbHeight; y++){
1033
            for(x=0; x<mbWidth; x++){
1065 1034
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
1035
            }
1036
            av_log(c, AV_LOG_INFO, "\n");
1066 1037
        }
1067 1038
        av_log(c, AV_LOG_INFO, "\n");
1068
}
1069
        av_log(c, AV_LOG_INFO, "\n");
1070
}
1071

  
1072
        if((pict_type&7)!=3)
1073
        {
1074
                if (QPStride >= 0) {
1075
                        int i;
1076
                        const int count= mbHeight * QPStride;
1077
                        for(i=0; i<(count>>2); i++){
1078
                                ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1079
                        }
1080
                        for(i<<=2; i<count; i++){
1081
                                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1082
                        }
1083
                } else {
1084
                        int i,j;
1085
                        for(i=0; i<mbHeight; i++) {
1086
                                    for(j=0; j<absQPStride; j++) {
1087
                                        c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1088
                                }
1089
                        }
1039
    }
1040

  
1041
    if((pict_type&7)!=3){
1042
        if (QPStride >= 0){
1043
            int i;
1044
            const int count= mbHeight * QPStride;
1045
            for(i=0; i<(count>>2); i++){
1046
                ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1047
            }
1048
            for(i<<=2; i<count; i++){
1049
                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1050
            }
1051
        } else {
1052
            int i,j;
1053
            for(i=0; i<mbHeight; i++) {
1054
                for(j=0; j<absQPStride; j++) {
1055
                    c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1090 1056
                }
1057
            }
1091 1058
        }
1059
    }
1092 1060

  
1093
        av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1094
               mode->lumMode, mode->chromMode);
1061
    av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1062
           mode->lumMode, mode->chromMode);
1095 1063

  
1096
        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1064
    postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1097 1065
                width, height, QP_store, QPStride, 0, mode, c);
1098 1066

  
1099
        width  = (width )>>c->hChromaSubSample;
1100
        height = (height)>>c->vChromaSubSample;
1101

  
1102
        if(mode->chromMode)
1103
        {
1104
                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1105
                        width, height, QP_store, QPStride, 1, mode, c);
1106
                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1107
                        width, height, QP_store, QPStride, 2, mode, c);
1108
        }
1109
        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
1110
        {
1111
                linecpy(dst[1], src[1], height, srcStride[1]);
1112
                linecpy(dst[2], src[2], height, srcStride[2]);
1113
        }
1114
        else
1115
        {
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff