Statistics
| Branch: | Revision:

ffmpeg / libavfilter / libmpcodecs / vf_filmdint.c @ e4852fb3

History | View | Annotate | Download (51.7 KB)

1
/*
2
 * This file is part of MPlayer.
3
 *
4
 * MPlayer is free software; you can redistribute it and/or modify
5
 * it under the terms of the GNU General Public License as published by
6
 * the Free Software Foundation; either version 2 of the License, or
7
 * (at your option) any later version.
8
 *
9
 * MPlayer is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
 * GNU General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU General Public License along
15
 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
16
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17
 */
18

    
19
#include <stdio.h>
20
#include <stdlib.h>
21
#include <string.h>
22
#include <sys/time.h>
23

    
24
#include "config.h"
25
#include "mp_msg.h"
26
#include "cpudetect.h"
27

    
28
#include "img_format.h"
29
#include "mp_image.h"
30
#include "vd.h"
31
#include "vf.h"
32
#include "cmmx.h"
33

    
34
#include "libvo/fastmemcpy.h"
35

    
36
#define NUM_STORED 4
37

    
38
enum pu_field_type_t {
39
    PU_1ST_OF_3,
40
    PU_2ND_OF_3,
41
    PU_3RD_OF_3,
42
    PU_1ST_OF_2,
43
    PU_2ND_OF_2,
44
    PU_INTERLACED
45
};
46

    
47
struct metrics {
48
    /* This struct maps to a packed word 64-bit MMX register */
49
    unsigned short int even;
50
    unsigned short int odd;
51
    unsigned short int noise;
52
    unsigned short int temp;
53
} __attribute__ ((aligned (8)));
54

    
55
struct frame_stats {
56
    struct metrics tiny, low, high, bigger, twox, max;
57
    struct { unsigned int even, odd, noise, temp; } sad;
58
    unsigned short interlaced_high;
59
    unsigned short interlaced_low;
60
    unsigned short num_blocks;
61
};
62

    
63
struct vf_priv_s {
64
    unsigned long inframes;
65
    unsigned long outframes;
66
    enum pu_field_type_t prev_type;
67
    unsigned swapped, chroma_swapped;
68
    unsigned luma_only;
69
    unsigned verbose;
70
    unsigned fast;
71
    unsigned long w, h, cw, ch, stride, chroma_stride, nplanes;
72
    unsigned long sad_thres;
73
    unsigned long dint_thres;
74
    unsigned char *memory_allocated;
75
    unsigned char *planes[2*NUM_STORED][4];
76
    unsigned char **old_planes;
77
    unsigned long static_idx;
78
    unsigned long temp_idx;
79
    unsigned long crop_x, crop_y, crop_cx, crop_cy;
80
    unsigned long export_count, merge_count;
81
    unsigned long num_breaks;
82
    unsigned long num_copies;
83
    long in_inc, out_dec, iosync;
84
    long num_fields;
85
    long prev_fields;
86
    long notout;
87
    long mmx2;
88
    unsigned small_bytes[2];
89
    unsigned mmx_temp[2];
90
    struct frame_stats stats[2];
91
    struct metrics thres;
92
    char chflag;
93
    double diff_time, merge_time, decode_time, vo_time, filter_time;
94
};
95

    
96
#define PPZ { 2000, 2000, 0, 2000 }
97
#define PPR { 2000, 2000, 0, 2000 }
98
static const struct frame_stats ppzs = {PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,0,0,9999};
99
static const struct frame_stats pprs = {PPR,PPR,PPR,PPR,PPR,PPR,PPR,0,0,9999};
100

    
101
#ifndef MIN
102
#define        MIN(a,b) (((a)<(b))?(a):(b))
103
#endif
104
#ifndef MAX
105
#define        MAX(a,b) (((a)>(b))?(a):(b))
106
#endif
107

    
108
#define PDIFFUB(X,Y,T) "movq "    #X "," #T "\n\t" \
109
                       "psubusb " #Y "," #T "\n\t" \
110
                       "psubusb " #X "," #Y "\n\t" \
111
                       "paddusb " #Y "," #T "\n\t"
112

    
113
#define PDIFFUBT(X,Y,T) "movq "    #X "," #T "\n\t" \
114
                        "psubusb " #Y "," #T "\n\t" \
115
                        "psubusb " #X "," #Y "\n\t" \
116
                        "paddusb " #T "," #Y "\n\t"
117

    
118
#define PSUMBW(X,T,Z)        "movq " #X "," #T "\n\t" \
119
                        "punpcklbw " #Z "," #X "\n\t" \
120
                        "punpckhbw " #Z "," #T "\n\t" \
121
                        "paddw " #T "," #X "\n\t" \
122
                        "movq " #X "," #T "\n\t" \
123
                        "psllq $32, " #T "\n\t" \
124
                        "paddw " #T "," #X "\n\t" \
125
                        "movq " #X "," #T "\n\t" \
126
                        "psllq $16, " #T "\n\t" \
127
                        "paddw " #T "," #X "\n\t" \
128
                        "psrlq $48, " #X "\n\t"
129

    
130
#define PSADBW(X,Y,T,Z)        PDIFFUBT(X,Y,T) PSUMBW(Y,T,Z)
131

    
132
#define PMAXUB(X,Y) "psubusb " #X "," #Y "\n\tpaddusb " #X "," #Y "\n\t"
133
#define PMAXUW(X,Y) "psubusw " #X "," #Y "\n\tpaddusw " #X "," #Y "\n\t"
134
#define PMINUBT(X,Y,T)        "movq " #Y "," #T "\n\t" \
135
                        "psubusb " #X "," #T "\n\t" \
136
                        "psubusb " #T "," #Y "\n\t"
137
#define PAVGB(X,Y)        "pavgusb " #X "," #Y "\n\t"
138

    
139
static inline void
140
get_metrics_c(unsigned char *a, unsigned char *b, int as, int bs, int lines,
141
              struct metrics *m)
142
{
143
    a -= as;
144
    b -= bs;
145
    do {
146
        cmmx_t old_po = *(cmmx_t*)(a      );
147
        cmmx_t     po = *(cmmx_t*)(b      );
148
        cmmx_t      e = *(cmmx_t*)(b +   bs);
149
        cmmx_t  old_o = *(cmmx_t*)(a + 2*as);
150
        cmmx_t      o = *(cmmx_t*)(b + 2*bs);
151
        cmmx_t     ne = *(cmmx_t*)(b + 3*bs);
152
        cmmx_t old_no = *(cmmx_t*)(a + 4*as);
153
        cmmx_t     no = *(cmmx_t*)(b + 4*bs);
154

    
155
        cmmx_t   qup_old_odd = p31avgb(old_o, old_po);
156
        cmmx_t       qup_odd = p31avgb(    o,     po);
157
        cmmx_t qdown_old_odd = p31avgb(old_o, old_no);
158
        cmmx_t     qdown_odd = p31avgb(    o,     no);
159

    
160
        cmmx_t   qup_even = p31avgb(ne, e);
161
        cmmx_t qdown_even = p31avgb(e, ne);
162

    
163
        cmmx_t    temp_up_diff = pdiffub(qdown_even, qup_old_odd);
164
        cmmx_t   noise_up_diff = pdiffub(qdown_even, qup_odd);
165
        cmmx_t  temp_down_diff = pdiffub(qup_even, qdown_old_odd);
166
        cmmx_t noise_down_diff = pdiffub(qup_even, qdown_odd);
167

    
168
        cmmx_t odd_diff = pdiffub(o, old_o);
169
        m->odd  += psumbw(odd_diff);
170
        m->even += psadbw(e, *(cmmx_t*)(a+as));
171

    
172
        temp_up_diff  = pminub(temp_up_diff, temp_down_diff);
173
        temp_up_diff  = pminub(temp_up_diff, odd_diff);
174
        m->temp  += psumbw(temp_up_diff);
175
        noise_up_diff = pminub(noise_up_diff, odd_diff);
176
        noise_up_diff = pminub(noise_up_diff, noise_down_diff);
177

    
178
        m->noise += psumbw(noise_up_diff);
179
        a += 2*as;
180
        b += 2*bs;
181
    } while (--lines);
182
}
183

    
184
static inline void
185
get_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs,
186
                   int lines, struct metrics *m)
187
{
188
    a -= as;
189
    b -= bs;
190
    do {
191
        cmmx_t old_po = (*(cmmx_t*)(a       ) >> 1) & ~SIGN_BITS;
192
        cmmx_t     po = (*(cmmx_t*)(b       ) >> 1) & ~SIGN_BITS;
193
        cmmx_t  old_e = (*(cmmx_t*)(a +   as) >> 1) & ~SIGN_BITS;
194
        cmmx_t      e = (*(cmmx_t*)(b +   bs) >> 1) & ~SIGN_BITS;
195
        cmmx_t  old_o = (*(cmmx_t*)(a + 2*as) >> 1) & ~SIGN_BITS;
196
        cmmx_t      o = (*(cmmx_t*)(b + 2*bs) >> 1) & ~SIGN_BITS;
197
        cmmx_t     ne = (*(cmmx_t*)(b + 3*bs) >> 1) & ~SIGN_BITS;
198
        cmmx_t old_no = (*(cmmx_t*)(a + 4*as) >> 1) & ~SIGN_BITS;
199
        cmmx_t     no = (*(cmmx_t*)(b + 4*bs) >> 1) & ~SIGN_BITS;
200

    
201
        cmmx_t   qup_old_odd = p31avgb_s(old_o, old_po);
202
        cmmx_t       qup_odd = p31avgb_s(    o,     po);
203
        cmmx_t qdown_old_odd = p31avgb_s(old_o, old_no);
204
        cmmx_t     qdown_odd = p31avgb_s(    o,     no);
205

    
206
        cmmx_t   qup_even = p31avgb_s(ne, e);
207
        cmmx_t qdown_even = p31avgb_s(e, ne);
208

    
209
        cmmx_t    temp_up_diff = pdiffub_s(qdown_even, qup_old_odd);
210
        cmmx_t   noise_up_diff = pdiffub_s(qdown_even, qup_odd);
211
        cmmx_t  temp_down_diff = pdiffub_s(qup_even, qdown_old_odd);
212
        cmmx_t noise_down_diff = pdiffub_s(qup_even, qdown_odd);
213

    
214
        cmmx_t odd_diff = pdiffub_s(o, old_o);
215
        m->odd  += psumbw_s(odd_diff) << 1;
216
        m->even += psadbw_s(e, old_e) << 1;
217

    
218
        temp_up_diff  = pminub_s(temp_up_diff, temp_down_diff);
219
        temp_up_diff  = pminub_s(temp_up_diff, odd_diff);
220
        m->temp      += psumbw_s(temp_up_diff) << 1;
221
        noise_up_diff = pminub_s(noise_up_diff, odd_diff);
222
        noise_up_diff = pminub_s(noise_up_diff, noise_down_diff);
223

    
224
        m->noise += psumbw_s(noise_up_diff) << 1;
225
        a += 2*as;
226
        b += 2*bs;
227
    } while (--lines);
228
}
229

    
230
static inline void
231
get_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
232
                   int lines, struct metrics *m)
233
{
234
    a -= as;
235
    b -= bs;
236
    do {
237
        cmmx_t old_po = (*(cmmx_t*)(a       )>>1) & ~SIGN_BITS;
238
        cmmx_t     po = (*(cmmx_t*)(b       )>>1) & ~SIGN_BITS;
239
        cmmx_t  old_e = (*(cmmx_t*)(a +   as)>>1) & ~SIGN_BITS;
240
        cmmx_t      e = (*(cmmx_t*)(b +   bs)>>1) & ~SIGN_BITS;
241
        cmmx_t  old_o = (*(cmmx_t*)(a + 2*as)>>1) & ~SIGN_BITS;
242
        cmmx_t      o = (*(cmmx_t*)(b + 2*bs)>>1) & ~SIGN_BITS;
243
        cmmx_t     ne = (*(cmmx_t*)(b + 3*bs)>>1) & ~SIGN_BITS;
244

    
245
        cmmx_t  down_even = p31avgb_s(e, ne);
246
        cmmx_t     up_odd = p31avgb_s(o, po);
247
        cmmx_t up_old_odd = p31avgb_s(old_o, old_po);
248

    
249
        cmmx_t   odd_diff = pdiffub_s(o, old_o);
250
        cmmx_t  temp_diff = pdiffub_s(down_even, up_old_odd);
251
        cmmx_t noise_diff = pdiffub_s(down_even, up_odd);
252

    
253
        m->even += psadbw_s(e, old_e) << 1;
254
        m->odd  += psumbw_s(odd_diff) << 1;
255

    
256
        temp_diff  = pminub_s(temp_diff, odd_diff);
257
        noise_diff = pminub_s(noise_diff, odd_diff);
258

    
259
        m->noise += psumbw_s(noise_diff) << 1;
260
        m->temp  += psumbw_s(temp_diff) << 1;
261
        a += 2*as;
262
        b += 2*bs;
263
    } while (--lines);
264

    
265
}
266

    
267
static inline void
268
get_block_stats(struct metrics *m, struct vf_priv_s *p, struct frame_stats *s)
269
{
270
    unsigned two_e = m->even  + MAX(m->even , p->thres.even );
271
    unsigned two_o = m->odd   + MAX(m->odd  , p->thres.odd  );
272
    unsigned two_n = m->noise + MAX(m->noise, p->thres.noise);
273
    unsigned two_t = m->temp  + MAX(m->temp , p->thres.temp );
274

    
275
    unsigned e_big   = m->even  >= (m->odd   + two_o + 1)/2;
276
    unsigned o_big   = m->odd   >= (m->even  + two_e + 1)/2;
277
    unsigned n_big   = m->noise >= (m->temp  + two_t + 1)/2;
278
    unsigned t_big   = m->temp  >= (m->noise + two_n + 1)/2;
279

    
280
    unsigned e2x     = m->even  >= two_o;
281
    unsigned o2x     = m->odd   >= two_e;
282
    unsigned n2x     = m->noise >= two_t;
283
    unsigned t2x     = m->temp  >= two_n;
284

    
285
    unsigned ntiny_e = m->even  > p->thres.even ;
286
    unsigned ntiny_o = m->odd   > p->thres.odd  ;
287
    unsigned ntiny_n = m->noise > p->thres.noise;
288
    unsigned ntiny_t = m->temp  > p->thres.temp ;
289

    
290
    unsigned nlow_e  = m->even  > 2*p->thres.even ;
291
    unsigned nlow_o  = m->odd   > 2*p->thres.odd  ;
292
    unsigned nlow_n  = m->noise > 2*p->thres.noise;
293
    unsigned nlow_t  = m->temp  > 2*p->thres.temp ;
294

    
295
    unsigned high_e  = m->even  > 4*p->thres.even ;
296
    unsigned high_o  = m->odd   > 4*p->thres.odd  ;
297
    unsigned high_n  = m->noise > 4*p->thres.noise;
298
    unsigned high_t  = m->temp  > 4*p->thres.temp ;
299

    
300
    unsigned low_il  = !n_big && !t_big && ntiny_n && ntiny_t;
301
    unsigned high_il = !n_big && !t_big && nlow_n  && nlow_t;
302

    
303
    if (low_il | high_il) {
304
        s->interlaced_low  += low_il;
305
        s->interlaced_high += high_il;
306
    } else {
307
        s->tiny.even  += ntiny_e;
308
        s->tiny.odd   += ntiny_o;
309
        s->tiny.noise += ntiny_n;
310
        s->tiny.temp  += ntiny_t;
311

    
312
        s->low .even  += nlow_e ;
313
        s->low .odd   += nlow_o ;
314
        s->low .noise += nlow_n ;
315
        s->low .temp  += nlow_t ;
316

    
317
        s->high.even  += high_e ;
318
        s->high.odd   += high_o ;
319
        s->high.noise += high_n ;
320
        s->high.temp  += high_t ;
321

    
322
        if (m->even  >=        p->sad_thres) s->sad.even  += m->even ;
323
        if (m->odd   >=        p->sad_thres) s->sad.odd   += m->odd  ;
324
        if (m->noise >=        p->sad_thres) s->sad.noise += m->noise;
325
        if (m->temp  >=        p->sad_thres) s->sad.temp  += m->temp ;
326
    }
327
    s->num_blocks++;
328
    s->max.even  = MAX(s->max.even , m->even );
329
    s->max.odd   = MAX(s->max.odd  , m->odd  );
330
    s->max.noise = MAX(s->max.noise, m->noise);
331
    s->max.temp  = MAX(s->max.temp , m->temp );
332

    
333
    s->bigger.even  += e_big  ;
334
    s->bigger.odd   += o_big  ;
335
    s->bigger.noise += n_big  ;
336
    s->bigger.temp  += t_big  ;
337

    
338
    s->twox.even  += e2x    ;
339
    s->twox.odd   += o2x    ;
340
    s->twox.noise += n2x    ;
341
    s->twox.temp  += t2x    ;
342

    
343
}
344

    
345
static inline struct metrics
346
block_metrics_c(unsigned char *a, unsigned char *b, int as, int bs,
347
                int lines, struct vf_priv_s *p, struct frame_stats *s)
348
{
349
    struct metrics tm;
350
    tm.even = tm.odd = tm.noise = tm.temp = 0;
351
    get_metrics_c(a, b, as, bs, lines, &tm);
352
    if (sizeof(cmmx_t) < 8)
353
        get_metrics_c(a+4, b+4, as, bs, lines, &tm);
354
    get_block_stats(&tm, p, s);
355
    return tm;
356
}
357

    
358
static inline struct metrics
359
block_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs,
360
                int lines, struct vf_priv_s *p, struct frame_stats *s)
361
{
362
    struct metrics tm;
363
    tm.even = tm.odd = tm.noise = tm.temp = 0;
364
    get_metrics_fast_c(a, b, as, bs, lines, &tm);
365
    if (sizeof(cmmx_t) < 8)
366
        get_metrics_fast_c(a+4, b+4, as, bs, lines, &tm);
367
    get_block_stats(&tm, p, s);
368
    return tm;
369
}
370

    
371
static inline struct metrics
372
block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs,
373
                int lines, struct vf_priv_s *p, struct frame_stats *s)
374
{
375
    struct metrics tm;
376
    tm.even = tm.odd = tm.noise = tm.temp = 0;
377
    get_metrics_faster_c(a, b, as, bs, lines, &tm);
378
    if (sizeof(cmmx_t) < 8)
379
        get_metrics_faster_c(a+4, b+4, as, bs, lines, &tm);
380
    get_block_stats(&tm, p, s);
381
    return tm;
382
}
383

    
384
#define MEQ(X,Y) ((X).even == (Y).even && (X).odd == (Y).odd && (X).temp == (Y).temp && (X).noise == (Y).noise)
385

    
386
#define BLOCK_METRICS_TEMPLATE() \
387
    __asm__ volatile("pxor %mm7, %mm7\n\t"   /* The result is colleted in mm7 */ \
388
                 "pxor %mm6, %mm6\n\t"   /* Temp to stay at 0 */             \
389
        );                                                                     \
390
    a -= as;                                                                     \
391
    b -= bs;                                                                     \
392
    do {                                                                     \
393
        __asm__ volatile(                                                     \
394
            "movq (%0,%2), %%mm0\n\t"                                             \
395
            "movq (%1,%3), %%mm1\n\t"   /* mm1 = even */                     \
396
            PSADBW(%%mm1, %%mm0, %%mm4, %%mm6)                                     \
397
            "paddusw %%mm0, %%mm7\n\t"  /* even diff */                             \
398
            "movq (%0,%2,2), %%mm0\n\t" /* mm0 = old odd */                     \
399
            "movq (%1,%3,2), %%mm2\n\t" /* mm2 = odd */                             \
400
            "movq (%0), %%mm3\n\t"                                             \
401
            "psubusb %4, %%mm3\n\t"                                             \
402
            PAVGB(%%mm0, %%mm3)                                                     \
403
            PAVGB(%%mm0, %%mm3)    /* mm3 = qup old odd */                     \
404
            "movq %%mm0, %%mm5\n\t"                                             \
405
            PSADBW(%%mm2, %%mm0, %%mm4, %%mm6)                                     \
406
            "psllq $16, %%mm0\n\t"                                             \
407
            "paddusw %%mm0, %%mm7\n\t"                                             \
408
            "movq (%1), %%mm4\n\t"                                             \
409
            "lea (%0,%2,2), %0\n\t"                                             \
410
            "lea (%1,%3,2), %1\n\t"                                             \
411
            "psubusb %4, %%mm4\n\t"                                             \
412
            PAVGB(%%mm2, %%mm4)                                                     \
413
            PAVGB(%%mm2, %%mm4)    /* mm4 = qup odd */                             \
414
            PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 =abs(oldodd-odd) */             \
415
            "movq (%1,%3), %%mm5\n\t"                                             \
416
            "psubusb %4, %%mm5\n\t"                                             \
417
            PAVGB(%%mm1, %%mm5)                                                     \
418
            PAVGB(%%mm5, %%mm1)    /* mm1 = qdown even */                     \
419
            PAVGB((%1,%3), %%mm5)  /* mm5 = qup next even */                     \
420
            PDIFFUBT(%%mm1, %%mm3, %%mm0) /* mm3 = abs(qupoldo-qde) */             \
421
            PDIFFUBT(%%mm1, %%mm4, %%mm0) /* mm4 = abs(qupodd-qde) */             \
422
            PMINUBT(%%mm2, %%mm3, %%mm0)  /* limit temp to odd diff */             \
423
            PMINUBT(%%mm2, %%mm4, %%mm0)  /* limit noise to odd diff */             \
424
            "movq (%1,%3,2), %%mm2\n\t"                                             \
425
            "psubusb %4, %%mm2\n\t"                                             \
426
            PAVGB((%1), %%mm2)                                                     \
427
            PAVGB((%1), %%mm2)    /* mm2 = qdown odd */                             \
428
            "movq (%0,%2,2), %%mm1\n\t"                                             \
429
            "psubusb %4, %%mm1\n\t"                                             \
430
            PAVGB((%0), %%mm1)                                                     \
431
            PAVGB((%0), %%mm1)  /* mm1 = qdown old odd */                     \
432
            PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 = abs(qdo-qune) */             \
433
            PDIFFUBT(%%mm5, %%mm1, %%mm0) /* mm1 = abs(qdoo-qune) */             \
434
            PMINUBT(%%mm4, %%mm2, %%mm0)  /* current */                             \
435
            PMINUBT(%%mm3, %%mm1, %%mm0)  /* old */                             \
436
            PSUMBW(%%mm2, %%mm0, %%mm6)                                             \
437
            PSUMBW(%%mm1, %%mm0, %%mm6)                                             \
438
            "psllq $32, %%mm2\n\t"                                             \
439
            "psllq $48, %%mm1\n\t"                                             \
440
            "paddusw %%mm2, %%mm7\n\t"                                             \
441
            "paddusw %%mm1, %%mm7\n\t"                                             \
442
            : "=r" (a), "=r" (b)                                             \
443
            : "r"((x86_reg)as), "r"((x86_reg)bs), "m" (ones), "0"(a), "1"(b), "X"(*a), "X"(*b) \
444
            );                                                                     \
445
    } while (--lines);
446

    
447
static inline struct metrics
448
block_metrics_3dnow(unsigned char *a, unsigned char *b, int as, int bs,
449
                    int lines, struct vf_priv_s *p, struct frame_stats *s)
450
{
451
    struct metrics tm;
452
#if !HAVE_AMD3DNOW
453
    mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_3dnow: internal error\n");
454
#else
455
    static const unsigned long long ones = 0x0101010101010101ull;
456

    
457
    BLOCK_METRICS_TEMPLATE();
458
    __asm__ volatile("movq %%mm7, %0\n\temms" : "=m" (tm));
459
    get_block_stats(&tm, p, s);
460
#endif
461
    return tm;
462
}
463

    
464
#undef PSUMBW
465
#undef PSADBW
466
#undef PMAXUB
467
#undef PMINUBT
468
#undef PAVGB
469

    
470
#define PSUMBW(X,T,Z)        "psadbw " #Z "," #X "\n\t"
471
#define PSADBW(X,Y,T,Z) "psadbw " #X "," #Y "\n\t"
472
#define PMAXUB(X,Y)        "pmaxub " #X "," #Y "\n\t"
473
#define PMINUBT(X,Y,T)        "pminub " #X "," #Y "\n\t"
474
#define PAVGB(X,Y)        "pavgb "  #X "," #Y "\n\t"
475

    
476
static inline struct metrics
477
block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs,
478
                   int lines, struct vf_priv_s *p, struct frame_stats *s)
479
{
480
    struct metrics tm;
481
#if !HAVE_MMX
482
    mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_mmx2: internal error\n");
483
#else
484
    static const unsigned long long ones = 0x0101010101010101ull;
485
    x86_reg interlaced;
486
    x86_reg prefetch_line = (((long)a>>3) & 7) + 10;
487
#ifdef DEBUG
488
    struct frame_stats ts = *s;
489
#endif
490
    __asm__ volatile("prefetcht0 (%0,%2)\n\t"
491
                 "prefetcht0 (%1,%3)\n\t" :
492
                 : "r" (a), "r" (b),
493
                 "r" (prefetch_line * as), "r" (prefetch_line * bs));
494

    
495
    BLOCK_METRICS_TEMPLATE();
496

    
497
    s->num_blocks++;
498
    __asm__ volatile(
499
        "movq %3, %%mm0\n\t"
500
        "movq %%mm7, %%mm1\n\t"
501
        "psubusw %%mm0, %%mm1\n\t"
502
        "movq %%mm1, %%mm2\n\t"
503
        "paddusw %%mm0, %%mm2\n\t"
504
        "paddusw %%mm7, %%mm2\n\t"
505
        "pshufw $0xb1, %%mm2, %%mm3\n\t"
506
        "pavgw %%mm7, %%mm2\n\t"
507
        "pshufw $0xb1, %%mm2, %%mm2\n\t"
508
        "psubusw %%mm7, %%mm2\n\t"
509
        "pcmpeqw %%mm6, %%mm2\n\t" /* 1 if >= 1.5x */
510
        "psubusw %%mm7, %%mm3\n\t"
511
        "pcmpeqw %%mm6, %%mm3\n\t" /* 1 if >= 2x */
512
        "movq %1, %%mm4\n\t"
513
        "movq %2, %%mm5\n\t"
514
        "psubw %%mm2, %%mm4\n\t"
515
        "psubw %%mm3, %%mm5\n\t"
516
        "movq %%mm4, %1\n\t"
517
        "movq %%mm5, %2\n\t"
518
        "pxor %%mm4, %%mm4\n\t"
519
        "pcmpeqw %%mm1, %%mm4\n\t" /* 1 if <= t */
520
        "psubusw %%mm0, %%mm1\n\t"
521
        "pxor %%mm5, %%mm5\n\t"
522
        "pcmpeqw %%mm1, %%mm5\n\t" /* 1 if <= 2t */
523
        "psubusw %%mm0, %%mm1\n\t"
524
        "psubusw %%mm0, %%mm1\n\t"
525
        "pcmpeqw %%mm6, %%mm1\n\t" /* 1 if <= 4t */
526
        "pshufw $0xb1, %%mm2, %%mm0\n\t"
527
        "por %%mm2, %%mm0\n\t"     /* 1 if not close */
528
        "punpckhdq %%mm0, %%mm0\n\t"
529
        "movq %%mm4, %%mm2\n\t"      /* tttt */
530
        "punpckhdq %%mm5, %%mm2\n\t" /* ttll */
531
        "por %%mm2, %%mm0\n\t"
532
        "pcmpeqd %%mm6, %%mm0\n\t" /* close && big */
533
        "psrlq $16, %%mm0\n\t"
534
        "psrlw $15, %%mm0\n\t"
535
        "movd %%mm0, %0\n\t"
536
        : "=r" (interlaced), "=m" (s->bigger), "=m" (s->twox)
537
        : "m" (p->thres)
538
        );
539

    
540
    if (interlaced) {
541
        s->interlaced_high += interlaced >> 16;
542
        s->interlaced_low += interlaced;
543
    } else {
544
        __asm__ volatile(
545
            "pcmpeqw %%mm0, %%mm0\n\t" /* -1 */
546
            "psubw         %%mm0, %%mm4\n\t"
547
            "psubw         %%mm0, %%mm5\n\t"
548
            "psubw         %%mm0, %%mm1\n\t"
549
            "paddw %0, %%mm4\n\t"
550
            "paddw %1, %%mm5\n\t"
551
            "paddw %2, %%mm1\n\t"
552
            "movq %%mm4, %0\n\t"
553
            "movq %%mm5, %1\n\t"
554
            "movq %%mm1, %2\n\t"
555
            : "=m" (s->tiny), "=m" (s->low), "=m" (s->high)
556
            );
557

    
558
        __asm__ volatile(
559
            "pshufw $0, %2, %%mm0\n\t"
560
            "psubusw %%mm7, %%mm0\n\t"
561
            "pcmpeqw %%mm6, %%mm0\n\t"   /* 0 if below sad_thres */
562
            "pand %%mm7, %%mm0\n\t"
563
            "movq %%mm0, %%mm1\n\t"
564
            "punpcklwd %%mm6, %%mm0\n\t" /* sad even, odd */
565
            "punpckhwd %%mm6, %%mm1\n\t" /* sad noise, temp */
566
            "paddd %0, %%mm0\n\t"
567
            "paddd %1, %%mm1\n\t"
568
            "movq %%mm0, %0\n\t"
569
            "movq %%mm1, %1\n\t"
570
            : "=m" (s->sad.even), "=m" (s->sad.noise)
571
            : "m" (p->sad_thres)
572
            );
573
    }
574

    
575
    __asm__ volatile(
576
        "movq %%mm7, (%1)\n\t"
577
        PMAXUW((%0), %%mm7)
578
        "movq %%mm7, (%0)\n\t"
579
        "emms"
580
        : : "r" (&s->max), "r" (&tm), "X" (s->max)
581
        : "memory"
582
        );
583
#ifdef DEBUG
584
    if (1) {
585
        struct metrics cm;
586
        a -= 7*as;
587
        b -= 7*bs;
588
        cm = block_metrics_c(a, b, as, bs, 4, p, &ts);
589
        if (!MEQ(tm, cm))
590
            mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad metrics\n");
591
        if (s) {
592
#           define CHECK(X) if (!MEQ(s->X, ts.X)) \
593
                mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad " #X "\n");
594
            CHECK(tiny);
595
            CHECK(low);
596
            CHECK(high);
597
            CHECK(sad);
598
            CHECK(max);
599
        }
600
    }
601
#endif
602
#endif
603
    return tm;
604
}
605

    
606
static inline int
607
dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos,
608
                    long cos, int ds, int ss, int w, int t)
609
{
610
#if !HAVE_MMX
611
    mp_msg(MSGT_VFILTER, MSGL_FATAL, "dint_copy_line_mmx2: internal error\n");
612
    return 0;
613
#else
614
    unsigned long len = (w+7) >> 3;
615
    int ret;
616
    __asm__ volatile (
617
        "pxor %%mm6, %%mm6 \n\t"       /* deinterlaced pixel counter */
618
        "movd %0, %%mm7 \n\t"
619
        "punpcklbw %%mm7, %%mm7 \n\t"
620
        "punpcklwd %%mm7, %%mm7 \n\t"
621
        "punpckldq %%mm7, %%mm7 \n\t"  /* mm7 = threshold */
622
        : /* no output */
623
        : "rm" (t)
624
        );
625
    do {
626
        __asm__ volatile (
627
            "movq (%0), %%mm0\n\t"
628
            "movq (%0,%3,2), %%mm1\n\t"
629
            "movq %%mm0, (%2)\n\t"
630
            "pmaxub %%mm1, %%mm0\n\t"
631
            "pavgb (%0), %%mm1\n\t"
632
            "psubusb %%mm1, %%mm0\n\t"
633
            "paddusb %%mm7, %%mm0\n\t"  /* mm0 = max-avg+thr */
634
            "movq (%0,%1), %%mm2\n\t"
635
            "movq (%0,%5), %%mm3\n\t"
636
            "movq %%mm2, %%mm4\n\t"
637
            PDIFFUBT(%%mm1, %%mm2, %%mm5)
638
            PDIFFUBT(%%mm1, %%mm3, %%mm5)
639
            "pminub %%mm2, %%mm3\n\t"
640
            "pcmpeqb %%mm3, %%mm2\n\t"  /* b = min */
641
            "pand %%mm2, %%mm4\n\t"
642
            "pandn (%0,%5), %%mm2\n\t"
643
            "por %%mm4, %%mm2\n\t"
644
            "pminub %%mm0, %%mm3\n\t"
645
            "pcmpeqb %%mm0, %%mm3\n\t"  /* set to 1s if >= threshold */
646
            "psubb %%mm3, %%mm6\n\t"    /* count pixels above thr. */
647
            "pand %%mm3, %%mm1 \n\t"
648
            "pandn %%mm2, %%mm3 \n\t"
649
            "por %%mm3, %%mm1 \n\t"     /* avg if >= threshold */
650
            "movq %%mm1, (%2,%4) \n\t"
651
            : /* no output */
652
            : "r" (a), "r" ((x86_reg)bos), "r" ((x86_reg)dst), "r" ((x86_reg)ss), "r" ((x86_reg)ds), "r" ((x86_reg)cos)
653
            );
654
        a += 8;
655
        dst += 8;
656
    } while (--len);
657

    
658
    __asm__ volatile ("pxor %%mm7, %%mm7 \n\t"
659
                  "psadbw %%mm6, %%mm7 \n\t"
660
                  "movd %%mm7, %0 \n\t"
661
                  "emms \n\t"
662
                  : "=r" (ret)
663
        );
664
    return ret;
665
#endif
666
}
667

    
668
static inline int
669
dint_copy_line(unsigned char *dst, unsigned char *a, long bos,
670
               long cos, int ds, int ss, int w, int t)
671
{
672
    unsigned long len = ((unsigned long)w+sizeof(cmmx_t)-1) / sizeof(cmmx_t);
673
    cmmx_t dint_count = 0;
674
    cmmx_t thr;
675
    t |= t <<  8;
676
    thr = t | (t << 16);
677
    if (sizeof(cmmx_t) > 4)
678
        thr |= thr << (sizeof(cmmx_t)*4);
679
    do {
680
        cmmx_t e = *(cmmx_t*)a;
681
        cmmx_t ne = *(cmmx_t*)(a+2*ss);
682
        cmmx_t o = *(cmmx_t*)(a+bos);
683
        cmmx_t oo = *(cmmx_t*)(a+cos);
684
        cmmx_t maxe = pmaxub(e, ne);
685
        cmmx_t avge = pavgb(e, ne);
686
        cmmx_t max_diff = maxe - avge + thr; /* 0<=max-avg<128, thr<128 */
687
        cmmx_t diffo  = pdiffub(avge, o);
688
        cmmx_t diffoo = pdiffub(avge, oo);
689
        cmmx_t diffcmp = pcmpgtub(diffo, diffoo);
690
        cmmx_t bo = ((oo ^ o) & diffcmp) ^ o;
691
        cmmx_t diffbo = ((diffoo ^ diffo) & diffcmp) ^ diffo;
692
        cmmx_t above_thr = ~pcmpgtub(max_diff, diffbo);
693
        cmmx_t bo_or_avg = ((avge ^ bo) & above_thr) ^ bo;
694
        dint_count += above_thr & ONE_BYTES;
695
        *(cmmx_t*)(dst) = e;
696
        *(cmmx_t*)(dst+ds) = bo_or_avg;
697
        a += sizeof(cmmx_t);
698
        dst += sizeof(cmmx_t);
699
    } while (--len);
700
    return psumbw(dint_count);
701
}
702

    
703
static int
704
dint_copy_plane(unsigned char *d, unsigned char *a, unsigned char *b,
705
                unsigned char *c, unsigned long w, unsigned long h,
706
                unsigned long ds, unsigned long ss, unsigned long threshold,
707
                long field, long mmx2)
708
{
709
    unsigned long ret = 0;
710
    long bos = b - a;
711
    long cos = c - a;
712
    if (field) {
713
        fast_memcpy(d, b, w);
714
        h--;
715
        d += ds;
716
        a += ss;
717
    }
718
    bos += ss;
719
    cos += ss;
720
    while (h > 2) {
721
        if (threshold >= 128) {
722
            fast_memcpy(d, a, w);
723
            fast_memcpy(d+ds, a+bos, w);
724
        } else if (mmx2 == 1) {
725
            ret += dint_copy_line_mmx2(d, a, bos, cos, ds, ss, w, threshold);
726
        } else
727
            ret += dint_copy_line(d, a, bos, cos, ds, ss, w, threshold);
728
        h -= 2;
729
        d += 2*ds;
730
        a += 2*ss;
731
    }
732
    fast_memcpy(d, a, w);
733
    if (h == 2)
734
        fast_memcpy(d+ds, a+bos, w);
735
    return ret;
736
}
737

    
738
static void
739
copy_merge_fields(struct vf_priv_s *p, mp_image_t *dmpi,
740
                  unsigned char **old, unsigned char **new, unsigned long show)
741
{
742
    unsigned long threshold = 256;
743
    unsigned long field = p->swapped;
744
    unsigned long dint_pixels = 0;
745
    unsigned char **other = old;
746
    if (show >= 12 || !(show & 3))
747
        show >>= 2, other = new, new = old;
748
    if (show <= 2) {  /* Single field: de-interlace */
749
        threshold = p->dint_thres;
750
        field ^= show & 1;
751
        old = new;
752
    } else if (show == 3)
753
        old = new;
754
    else
755
        field ^= 1;
756
    dint_pixels +=dint_copy_plane(dmpi->planes[0], old[0], new[0],
757
                                  other[0], p->w, p->h, dmpi->stride[0],
758
                                  p->stride, threshold, field, p->mmx2);
759
    if (dmpi->flags & MP_IMGFLAG_PLANAR) {
760
        if (p->luma_only)
761
            old = new, other = new;
762
        else
763
            threshold = threshold/2 + 1;
764
        field ^= p->chroma_swapped;
765
        dint_copy_plane(dmpi->planes[1], old[1], new[1],
766
                        other[1], p->cw, p->ch,        dmpi->stride[1],
767
                        p->chroma_stride, threshold, field, p->mmx2);
768
        dint_copy_plane(dmpi->planes[2], old[2], new[2],
769
                        other[2], p->cw, p->ch, dmpi->stride[2],
770
                        p->chroma_stride, threshold, field, p->mmx2);
771
    }
772
    if (dint_pixels > 0 && p->verbose)
773
        mp_msg(MSGT_VFILTER,MSGL_INFO,"Deinterlaced %lu pixels\n",dint_pixels);
774
}
775

    
776
static void diff_planes(struct vf_priv_s *p, struct frame_stats *s,
777
                        unsigned char *of, unsigned char *nf,
778
                        int w, int h, int os, int ns, int swapped)
779
{
780
    int i, y;
781
    int align = -(long)nf & 7;
782
    of += align;
783
    nf += align;
784
    w -= align;
785
    if (swapped)
786
        of -= os, nf -= ns;
787
    i = (h*3 >> 7) & ~1;
788
    of += i*os + 8;
789
    nf += i*ns + 8;
790
    h -= i;
791
    w -= 16;
792

    
793
    memset(s, 0, sizeof(*s));
794

    
795
    for (y = (h-8) >> 3; y; y--) {
796
        if (p->mmx2 == 1) {
797
            for (i = 0; i < w; i += 8)
798
                block_metrics_mmx2(of+i, nf+i, os, ns, 4, p, s);
799
        } else if (p->mmx2 == 2) {
800
            for (i = 0; i < w; i += 8)
801
                block_metrics_3dnow(of+i, nf+i, os, ns, 4, p, s);
802
        } else if (p->fast > 3) {
803
            for (i = 0; i < w; i += 8)
804
                block_metrics_faster_c(of+i, nf+i, os, ns, 4, p, s);
805
        } else if (p->fast > 1) {
806
            for (i = 0; i < w; i += 8)
807
                block_metrics_fast_c(of+i, nf+i, os, ns, 4, p, s);
808
        } else {
809
            for (i = 0; i < w; i += 8)
810
                block_metrics_c(of+i, nf+i, os, ns, 4, p, s);
811
        }
812
        of += 8*os;
813
        nf += 8*ns;
814
    }
815
}
816

    
817
#define METRICS(X) (X).even, (X).odd, (X).noise, (X).temp
818

    
819
static void diff_fields(struct vf_priv_s *p, struct frame_stats *s,
820
                        unsigned char **old, unsigned char **new)
821
{
822
    diff_planes(p, s, old[0], new[0], p->w, p->h,
823
                p->stride, p->stride, p->swapped);
824
    s->sad.even  = (s->sad.even  * 16ul) / s->num_blocks;
825
    s->sad.odd   = (s->sad.odd   * 16ul) / s->num_blocks;
826
    s->sad.noise = (s->sad.noise * 16ul) / s->num_blocks;
827
    s->sad.temp  = (s->sad.temp  * 16ul) / s->num_blocks;
828
    if (p->verbose)
829
        mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu%c M:%d/%d/%d/%d - %d, "
830
               "t:%d/%d/%d/%d, l:%d/%d/%d/%d, h:%d/%d/%d/%d, bg:%d/%d/%d/%d, "
831
               "2x:%d/%d/%d/%d, sad:%d/%d/%d/%d, lil:%d, hil:%d, ios:%.1f\n",
832
               p->inframes, p->chflag, METRICS(s->max), s->num_blocks,
833
               METRICS(s->tiny), METRICS(s->low), METRICS(s->high),
834
               METRICS(s->bigger), METRICS(s->twox), METRICS(s->sad),
835
               s->interlaced_low, s->interlaced_high,
836
               p->iosync / (double) p->in_inc);
837
}
838

    
839
static const char *parse_args(struct vf_priv_s *p, const char *args)
840
{
841
    args--;
842
    while (args && *++args &&
843
           (sscanf(args, "io=%lu:%lu", &p->out_dec, &p->in_inc) == 2 ||
844
            sscanf(args, "diff_thres=%hu", &p->thres.even ) == 1 ||
845
            sscanf(args, "comb_thres=%hu", &p->thres.noise) == 1 ||
846
            sscanf(args, "sad_thres=%lu",  &p->sad_thres  ) == 1 ||
847
            sscanf(args, "dint_thres=%lu", &p->dint_thres ) == 1 ||
848
            sscanf(args, "fast=%u",        &p->fast       ) == 1 ||
849
            sscanf(args, "mmx2=%lu",       &p->mmx2       ) == 1 ||
850
            sscanf(args, "luma_only=%u",   &p->luma_only  ) == 1 ||
851
            sscanf(args, "verbose=%u",     &p->verbose    ) == 1 ||
852
            sscanf(args, "crop=%lu:%lu:%lu:%lu", &p->w,
853
                   &p->h, &p->crop_x, &p->crop_y) == 4))
854
        args = strchr(args, '/');
855
    return args;
856
}
857

    
858
static unsigned long gcd(unsigned long x, unsigned long y)
859
{
860
    unsigned long t;
861
    if (x > y)
862
        t = x, x = y, y = t;
863

    
864
    while (x) {
865
        t = y % x;
866
        y = x;
867
        x = t;
868
    }
869
    return y;
870
}
871

    
872
static void init(struct vf_priv_s *p, mp_image_t *mpi)
873
{
874
    unsigned long i;
875
    unsigned long plane_size, chroma_plane_size;
876
    unsigned char *plane;
877
    unsigned long cos, los;
878
    p->crop_cx = p->crop_x >> mpi->chroma_x_shift;
879
    p->crop_cy = p->crop_y >> mpi->chroma_y_shift;
880
    if (mpi->flags & MP_IMGFLAG_ACCEPT_STRIDE) {
881
        p->stride = (mpi->w + 15) & ~15;
882
        p->chroma_stride = p->stride >> mpi->chroma_x_shift;
883
    } else {
884
        p->stride = mpi->width;
885
        p->chroma_stride = mpi->chroma_width;
886
    }
887
    p->cw = p->w >> mpi->chroma_x_shift;
888
    p->ch = p->h >> mpi->chroma_y_shift;
889
    p->nplanes = 1;
890
    p->static_idx = 0;
891
    p->temp_idx = 0;
892
    p->old_planes = p->planes[0];
893
    plane_size = mpi->h * p->stride;
894
    chroma_plane_size = mpi->flags & MP_IMGFLAG_PLANAR ?
895
        mpi->chroma_height * p->chroma_stride : 0;
896
    p->memory_allocated =
897
        malloc(NUM_STORED * (plane_size+2*chroma_plane_size) +
898
               8*p->chroma_stride + 4096);
899
    /* align to page boundary */
900
    plane = p->memory_allocated + (-(long)p->memory_allocated & 4095);
901
    memset(plane, 0, NUM_STORED * plane_size);
902
    los = p->crop_x  + p->crop_y  * p->stride;
903
    cos = p->crop_cx + p->crop_cy * p->chroma_stride;
904
    for (i = 0; i != NUM_STORED; i++, plane += plane_size) {
905
        p->planes[i][0] = plane;
906
        p->planes[NUM_STORED + i][0] = plane + los;
907
    }
908
    if (mpi->flags & MP_IMGFLAG_PLANAR) {
909
        p->nplanes = 3;
910
        memset(plane, 0x80, NUM_STORED * 2 * chroma_plane_size);
911
        for (i = 0; i != NUM_STORED; i++) {
912
            p->planes[i][1] = plane;
913
            p->planes[NUM_STORED + i][1] = plane + cos;
914
            plane += chroma_plane_size;
915
            p->planes[i][2] = plane;
916
            p->planes[NUM_STORED + i][2] = plane + cos;
917
            plane += chroma_plane_size;
918
        }
919
    }
920
    p->out_dec <<= 2;
921
    i = gcd(p->in_inc, p->out_dec);
922
    p->in_inc /= i;
923
    p->out_dec /= i;
924
    p->iosync = 0;
925
    p->num_fields = 3;
926
}
927

    
928
static inline double get_time(void)
929
{
930
    struct timeval tv;
931
    gettimeofday(&tv, 0);
932
    return tv.tv_sec + tv.tv_usec * 1e-6;
933
}
934

    
935
static void get_image(struct vf_instance *vf, mp_image_t *mpi)
936
{
937
    struct vf_priv_s *p = vf->priv;
938
    static unsigned char **planes, planes_idx;
939

    
940
    if (mpi->type == MP_IMGTYPE_STATIC) return;
941

    
942
    if (!p->planes[0][0]) init(p, mpi);
943

    
944
    if (mpi->type == MP_IMGTYPE_TEMP ||
945
        (mpi->type == MP_IMGTYPE_IPB && !(mpi->flags & MP_IMGFLAG_READABLE)))
946
        planes_idx = NUM_STORED/2 + (++p->temp_idx % (NUM_STORED/2));
947
    else
948
        planes_idx = ++p->static_idx % (NUM_STORED/2);
949
    planes = p->planes[planes_idx];
950
    mpi->priv = p->planes[NUM_STORED + planes_idx];
951
    if (mpi->priv == p->old_planes) {
952
        unsigned char **old_planes =
953
            p->planes[NUM_STORED + 2 + (++p->temp_idx & 1)];
954
        my_memcpy_pic(old_planes[0], p->old_planes[0],
955
                      p->w, p->h, p->stride, p->stride);
956
        if (mpi->flags & MP_IMGFLAG_PLANAR) {
957
            my_memcpy_pic(old_planes[1], p->old_planes[1],
958
                          p->cw, p->ch, p->chroma_stride, p->chroma_stride);
959
            my_memcpy_pic(old_planes[2], p->old_planes[2],
960
                          p->cw, p->ch, p->chroma_stride, p->chroma_stride);
961
        }
962
        p->old_planes = old_planes;
963
        p->num_copies++;
964
    }
965
    mpi->planes[0] = planes[0];
966
    mpi->stride[0] = p->stride;
967
    if (mpi->flags & MP_IMGFLAG_PLANAR) {
968
        mpi->planes[1] = planes[1];
969
        mpi->planes[2] = planes[2];
970
        mpi->stride[1] = mpi->stride[2] = p->chroma_stride;
971
    }
972
    mpi->width = p->stride;
973

    
974
    mpi->flags |= MP_IMGFLAG_DIRECT;
975
    mpi->flags &= ~MP_IMGFLAG_DRAW_CALLBACK;
976
}
977

    
978
static inline long
979
cmpe(unsigned long x, unsigned long y, unsigned long err, unsigned long e)
980
{
981
    long diff = x-y;
982
    long unit = ((x+y+err) >> e);
983
    long ret = (diff > unit) - (diff < -unit);
984
    unit >>= 1;
985
    return ret + (diff > unit) - (diff < -unit);
986
}
987

    
988
static unsigned long
989
find_breaks(struct vf_priv_s *p, struct frame_stats *s)
990
{
991
    struct frame_stats *ps = &p->stats[(p->inframes-1) & 1];
992
    long notfilm = 5*p->in_inc - p->out_dec;
993
    unsigned long n = s->num_blocks >> 8;
994
    unsigned long sad_comb_cmp = cmpe(s->sad.temp, s->sad.noise, 512, 1);
995
    unsigned long ret = 8;
996

    
997
    if (cmpe(s->sad.temp, s->sad.even, 512, 1) > 0)
998
        mp_msg(MSGT_VFILTER, MSGL_WARN,
999
               "@@@@@@@@ Bottom-first field??? @@@@@@@@\n");
1000
    if (s->sad.temp > 1000 && s->sad.noise > 1000)
1001
        return 3;
1002
    if (s->interlaced_high >= 2*n && s->sad.temp > 256 && s->sad.noise > 256)
1003
        return 3;
1004
    if (s->high.noise > s->num_blocks/4 && s->sad.noise > 10000 &&
1005
        s->sad.noise > 2*s->sad.even && s->sad.noise > 2*ps->sad.odd) {
1006
        // Mid-frame scene change
1007
        if (s->tiny.temp + s->interlaced_low  < n   ||
1008
            s->low.temp  + s->interlaced_high < n/4 ||
1009
            s->high.temp + s->interlaced_high < n/8 ||
1010
            s->sad.temp < 160)
1011
            return 1;
1012
        return 3;
1013
    }
1014
    if (s->high.temp > s->num_blocks/4 && s->sad.temp > 10000 &&
1015
        s->sad.temp > 2*ps->sad.odd && s->sad.temp > 2*ps->sad.even) {
1016
        // Start frame scene change
1017
        if (s->tiny.noise + s->interlaced_low  < n   ||
1018
            s->low.noise  + s->interlaced_high < n/4 ||
1019
            s->high.noise + s->interlaced_high < n/8 ||
1020
            s->sad.noise < 160)
1021
            return 2;
1022
        return 3;
1023
    }
1024
    if (sad_comb_cmp == 2)
1025
        return 2;
1026
    if (sad_comb_cmp == -2)
1027
        return 1;
1028

    
1029
    if (s->tiny.odd > 3*MAX(n,s->tiny.even) + s->interlaced_low)
1030
        return 1;
1031
    if (s->tiny.even > 3*MAX(n,s->tiny.odd)+s->interlaced_low &&
1032
        (!sad_comb_cmp || (s->low.noise <= n/4 && s->low.temp <= n/4)))
1033
        return 4;
1034

    
1035
    if (s->sad.noise < 64 && s->sad.temp < 64 &&
1036
        s->low.noise <= n/2 && s->high.noise <= n/4 &&
1037
        s->low.temp  <= n/2 && s->high.temp  <= n/4)
1038
        goto still;
1039

    
1040
    if (s->tiny.temp > 3*MAX(n,s->tiny.noise) + s->interlaced_low)
1041
        return 2;
1042
    if (s->tiny.noise > 3*MAX(n,s->tiny.temp) + s->interlaced_low)
1043
        return 1;
1044

    
1045
    if (s->low.odd > 3*MAX(n/4,s->low.even) + s->interlaced_high)
1046
        return 1;
1047
    if (s->low.even > 3*MAX(n/4,s->low.odd)+s->interlaced_high &&
1048
        s->sad.even > 2*s->sad.odd &&
1049
        (!sad_comb_cmp || (s->low.noise <= n/4 && s->low.temp <= n/4)))
1050
        return 4;
1051

    
1052
    if (s->low.temp > 3*MAX(n/4,s->low.noise) + s->interlaced_high)
1053
        return 2;
1054
    if (s->low.noise > 3*MAX(n/4,s->low.temp) + s->interlaced_high)
1055
        return 1;
1056

    
1057
    if (sad_comb_cmp == 1 && s->sad.noise < 64)
1058
        return 2;
1059
    if (sad_comb_cmp == -1 && s->sad.temp < 64)
1060
        return 1;
1061

    
1062
    if (s->tiny.odd <= n || (s->tiny.noise <= n/2 && s->tiny.temp <= n/2)) {
1063
        if (s->interlaced_low <= n) {
1064
            if (p->num_fields == 1)
1065
                goto still;
1066
            if (s->tiny.even <= n || ps->tiny.noise <= n/2)
1067
                /* Still frame */
1068
                goto still;
1069
            if (s->bigger.even >= 2*MAX(n,s->bigger.odd) + s->interlaced_low)
1070
                return 4;
1071
            if (s->low.even >= 2*n + s->interlaced_low)
1072
                return 4;
1073
            goto still;
1074
        }
1075
    }
1076
    if (s->low.odd <= n/4) {
1077
        if (s->interlaced_high <= n/4) {
1078
            if (p->num_fields == 1)
1079
                goto still;
1080
            if (s->low.even <= n/4)
1081
                /* Still frame */
1082
                goto still;
1083
            if (s->bigger.even >= 2*MAX(n/4,s->bigger.odd)+s->interlaced_high)
1084
                return 4;
1085
            if (s->low.even >= n/2 + s->interlaced_high)
1086
                return 4;
1087
            goto still;
1088
        }
1089
    }
1090
    if (s->bigger.temp > 2*MAX(n,s->bigger.noise) + s->interlaced_low)
1091
        return 2;
1092
    if (s->bigger.noise > 2*MAX(n,s->bigger.temp) + s->interlaced_low)
1093
        return 1;
1094
    if (s->bigger.temp > 2*MAX(n,s->bigger.noise) + s->interlaced_high)
1095
        return 2;
1096
    if (s->bigger.noise > 2*MAX(n,s->bigger.temp) + s->interlaced_high)
1097
        return 1;
1098
    if (s->twox.temp > 2*MAX(n,s->twox.noise) + s->interlaced_high)
1099
        return 2;
1100
    if (s->twox.noise > 2*MAX(n,s->twox.temp) + s->interlaced_high)
1101
        return 1;
1102
    if (s->bigger.even > 2*MAX(n,s->bigger.odd) + s->interlaced_low &&
1103
        s->bigger.temp < n && s->bigger.noise < n)
1104
        return 4;
1105
    if (s->interlaced_low > MIN(2*n, s->tiny.odd))
1106
        return 3;
1107
    ret = 8 + (1 << (s->sad.temp > s->sad.noise));
1108
  still:
1109
    if (p->num_fields == 1 && p->prev_fields == 3 && notfilm >= 0 &&
1110
        (s->tiny.temp <= s->tiny.noise || s->sad.temp < s->sad.noise+16))
1111
        return 1;
1112
    if (p->notout < p->num_fields && p->iosync > 2*p->in_inc && notfilm < 0)
1113
        notfilm = 0;
1114
    if (p->num_fields < 2 ||
1115
        (p->num_fields == 2 && p->prev_fields == 2 && notfilm < 0))
1116
        return ret;
1117
    if (!notfilm && (p->prev_fields&~1) == 2) {
1118
        if (p->prev_fields + p->num_fields == 5) {
1119
            if (s->tiny.noise <= s->tiny.temp ||
1120
                s->low.noise == 0 || s->low.noise < s->low.temp ||
1121
                s->sad.noise < s->sad.temp+16)
1122
                return 2;
1123
        }
1124
        if (p->prev_fields + p->num_fields == 4) {
1125
            if (s->tiny.temp <= s->tiny.noise ||
1126
                s->low.temp == 0 || s->low.temp < s->low.noise ||
1127
                s->sad.temp < s->sad.noise+16)
1128
                return 1;
1129
        }
1130
    }
1131
    if (p->num_fields > 2 &&
1132
        ps->sad.noise > s->sad.noise && ps->sad.noise > s->sad.temp)
1133
        return 4;
1134
    return 2 >> (s->sad.noise > s->sad.temp);
1135
}
1136

    
1137
#define ITOC(X) (!(X) ? ' ' : (X) + ((X)>9 ? 'a'-10 : '0'))
1138

    
1139
static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
1140
{
1141
    mp_image_t *dmpi;
1142
    struct vf_priv_s *p = vf->priv;
1143
    unsigned char **planes, **old_planes;
1144
    struct frame_stats *s  = &p->stats[p->inframes & 1];
1145
    struct frame_stats *ps = &p->stats[(p->inframes-1) & 1];
1146
    int swapped = 0;
1147
    const int flags = mpi->fields;
1148
    int breaks, prev;
1149
    int show_fields = 0;
1150
    int dropped_fields = 0;
1151
    double start_time, diff_time;
1152
    char prev_chflag = p->chflag;
1153
    int keep_rate;
1154

    
1155
    if (!p->planes[0][0]) init(p, mpi);
1156

    
1157
    old_planes = p->old_planes;
1158

    
1159
    if ((mpi->flags & MP_IMGFLAG_DIRECT) && mpi->priv) {
1160
        planes = mpi->priv;
1161
        mpi->priv = 0;
1162
    } else {
1163
        planes = p->planes[2 + (++p->temp_idx & 1)];
1164
        my_memcpy_pic(planes[0],
1165
                      mpi->planes[0] + p->crop_x + p->crop_y * mpi->stride[0],
1166
                      p->w, p->h, p->stride, mpi->stride[0]);
1167
        if (mpi->flags & MP_IMGFLAG_PLANAR) {
1168
            my_memcpy_pic(planes[1],
1169
                          mpi->planes[1] + p->crop_cx + p->crop_cy * mpi->stride[1],
1170
                          p->cw, p->ch, p->chroma_stride, mpi->stride[1]);
1171
            my_memcpy_pic(planes[2],
1172
                          mpi->planes[2] + p->crop_cx + p->crop_cy * mpi->stride[2],
1173
                          p->cw, p->ch, p->chroma_stride, mpi->stride[2]);
1174
            p->num_copies++;
1175
        }
1176
    }
1177

    
1178
    p->old_planes = planes;
1179
    p->chflag = ';';
1180
    if (flags & MP_IMGFIELD_ORDERED) {
1181
        swapped = !(flags & MP_IMGFIELD_TOP_FIRST);
1182
        p->chflag = (flags & MP_IMGFIELD_REPEAT_FIRST ? '|' :
1183
                     flags & MP_IMGFIELD_TOP_FIRST ? ':' : '.');
1184
    }
1185
    p->swapped = swapped;
1186

    
1187
    start_time = get_time();
1188
    if (p->chflag == '|') {
1189
        *s = ppzs;
1190
        p->iosync += p->in_inc;
1191
    } else if ((p->fast & 1) && prev_chflag == '|')
1192
        *s = pprs;
1193
    else
1194
        diff_fields(p, s, old_planes, planes);
1195
    diff_time = get_time();
1196
    p->diff_time += diff_time - start_time;
1197
    breaks = p->inframes ? find_breaks(p, s) : 2;
1198
    p->inframes++;
1199
    keep_rate = 4*p->in_inc == p->out_dec;
1200

    
1201
    switch (breaks) {
1202
      case 0:
1203
      case 8:
1204
      case 9:
1205
      case 10:
1206
        if (!keep_rate && p->notout < p->num_fields && p->iosync < 2*p->in_inc)
1207
            break;
1208
        if (p->notout < p->num_fields)
1209
            dropped_fields = -2;
1210
      case 4:
1211
        if (keep_rate || p->iosync >= -2*p->in_inc)
1212
            show_fields = (4<<p->num_fields)-1;
1213
        break;
1214
      case 3:
1215
        if (keep_rate)
1216
            show_fields = 2;
1217
        else if (p->iosync > 0) {
1218
            if (p->notout >= p->num_fields && p->iosync > 2*p->in_inc) {
1219
                show_fields = 4; /* prev odd only */
1220
                if (p->num_fields > 1)
1221
                    show_fields |= 8; /* + prev even */
1222
            } else {
1223
                show_fields = 2; /* even only */
1224
                if (p->notout >= p->num_fields)
1225
                    dropped_fields += p->num_fields;
1226
            }
1227
        }
1228
        break;
1229
      case 2:
1230
        if (p->iosync <= -3*p->in_inc) {
1231
            if (p->notout >= p->num_fields)
1232
                dropped_fields = p->num_fields;
1233
            break;
1234
        }
1235
        if (p->num_fields == 1) {
1236
            int prevbreak = ps->sad.noise >= 128;
1237
            if (p->iosync < 4*p->in_inc) {
1238
                show_fields = 3;
1239
                dropped_fields = prevbreak;
1240
            } else {
1241
                show_fields = 4 | (!prevbreak << 3);
1242
                if (p->notout < 1 + p->prev_fields)
1243
                    dropped_fields = -!prevbreak;
1244
            }
1245
            break;
1246
        }
1247
      default:
1248
        if (keep_rate)
1249
            show_fields = 3 << (breaks & 1);
1250
        else if (p->notout >= p->num_fields &&
1251
            p->iosync >= (breaks == 1 ? -p->in_inc :
1252
                          p->in_inc << (p->num_fields == 1))) {
1253
            show_fields = (1 << (2 + p->num_fields)) - (1<<breaks);
1254
        } else {
1255
            if (p->notout >= p->num_fields)
1256
                dropped_fields += p->num_fields + 2 - breaks;
1257
            if (breaks == 1) {
1258
                if (p->iosync >= 4*p->in_inc)
1259
                    show_fields = 6;
1260
            } else if (p->iosync > -3*p->in_inc)
1261
                show_fields = 3;  /* odd+even */
1262
        }
1263
        break;
1264
    }
1265

    
1266
    show_fields &= 15;
1267
    prev = p->prev_fields;
1268
    if (breaks < 8) {
1269
        if (p->num_fields == 1)
1270
            breaks &= ~4;
1271
        if (breaks)
1272
            p->num_breaks++;
1273
        if (breaks == 3)
1274
            p->prev_fields = p->num_fields = 1;
1275
        else if (breaks) {
1276
            p->prev_fields = p->num_fields + (breaks==1) - (breaks==4);
1277
            p->num_fields = breaks - (breaks == 4) + (p->chflag == '|');
1278
        } else
1279
            p->num_fields += 2;
1280
    } else
1281
        p->num_fields += 2;
1282

    
1283
    p->iosync += 4 * p->in_inc;
1284
    if (p->chflag == '|')
1285
        p->iosync += p->in_inc;
1286

    
1287
    if (show_fields) {
1288
        p->iosync -= p->out_dec;
1289
        p->notout = !(show_fields & 1) + !(show_fields & 3);
1290
        if (((show_fields &  3) ==  3 &&
1291
             (s->low.noise + s->interlaced_low < (s->num_blocks>>8) ||
1292
              s->sad.noise < 160)) ||
1293
            ((show_fields & 12) == 12 &&
1294
             (ps->low.noise + ps->interlaced_low < (s->num_blocks>>8) ||
1295
              ps->sad.noise < 160))) {
1296
            p->export_count++;
1297
            dmpi = vf_get_image(vf->next, mpi->imgfmt, MP_IMGTYPE_EXPORT,
1298
                                MP_IMGFLAG_PRESERVE|MP_IMGFLAG_READABLE,
1299
                                p->w, p->h);
1300
            if ((show_fields & 3) != 3) planes = old_planes;
1301
            dmpi->planes[0] = planes[0];
1302
            dmpi->stride[0] = p->stride;
1303
            dmpi->width = mpi->width;
1304
            if (mpi->flags & MP_IMGFLAG_PLANAR) {
1305
                dmpi->planes[1] = planes[1];
1306
                dmpi->planes[2] = planes[2];
1307
                dmpi->stride[1] = p->chroma_stride;
1308
                dmpi->stride[2] = p->chroma_stride;
1309
            }
1310
        } else {
1311
            p->merge_count++;
1312
            dmpi = vf_get_image(vf->next, mpi->imgfmt,
1313
                                MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE,
1314
                                p->w, p->h);
1315
            copy_merge_fields(p, dmpi, old_planes, planes, show_fields);
1316
        }
1317
        p->outframes++;
1318
    } else
1319
        p->notout += 2;
1320

    
1321
    if (p->verbose)
1322
        mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu %lu: %x %c %c %lu%s%s%c%s\n",
1323
               p->inframes, p->outframes,
1324
               breaks, breaks<8 && breaks>0 ? (int) p->prev_fields+'0' : ' ',
1325
               ITOC(show_fields),
1326
               p->num_breaks, 5*p->in_inc == p->out_dec && breaks<8 &&
1327
               breaks>0 && ((prev&~1)!=2 || prev+p->prev_fields!=5) ?
1328
               " ######## bad telecine ########" : "",
1329
               dropped_fields ? " ======== dropped ":"", ITOC(dropped_fields),
1330
               !show_fields || (show_fields & (show_fields-1)) ?
1331
               "" : " @@@@@@@@@@@@@@@@@");
1332

    
1333
    p->merge_time += get_time() - diff_time;
1334
    return show_fields ? vf_next_put_image(vf, dmpi, MP_NOPTS_VALUE) : 0;
1335
}
1336

    
1337
static int query_format(struct vf_instance *vf, unsigned int fmt)
1338
{
1339
    /* FIXME - support more formats */
1340
    switch (fmt) {
1341
      case IMGFMT_YV12:
1342
      case IMGFMT_IYUV:
1343
      case IMGFMT_I420:
1344
      case IMGFMT_411P:
1345
      case IMGFMT_422P:
1346
      case IMGFMT_444P:
1347
        return vf_next_query_format(vf, fmt);
1348
    }
1349
    return 0;
1350
}
1351

    
1352
static int config(struct vf_instance *vf,
1353
                  int width, int height, int d_width, int d_height,
1354
                  unsigned int flags, unsigned int outfmt)
1355
{
1356
    unsigned long cxm = 0;
1357
    unsigned long cym = 0;
1358
    struct vf_priv_s *p = vf->priv;
1359
    // rounding:
1360
    if(!IMGFMT_IS_RGB(outfmt) && !IMGFMT_IS_BGR(outfmt)){
1361
        switch(outfmt){
1362
          case IMGFMT_444P:
1363
          case IMGFMT_Y800:
1364
          case IMGFMT_Y8:
1365
            break;
1366
          case IMGFMT_YVU9:
1367
          case IMGFMT_IF09:
1368
            cym = 3;
1369
          case IMGFMT_411P:
1370
            cxm = 3;
1371
            break;
1372
          case IMGFMT_YV12:
1373
          case IMGFMT_I420:
1374
          case IMGFMT_IYUV:
1375
            cym = 1;
1376
          default:
1377
            cxm = 1;
1378
        }
1379
    }
1380
    p->chroma_swapped = !!(p->crop_y & (cym+1));
1381
    if (p->w) p->w += p->crop_x & cxm;
1382
    if (p->h) p->h += p->crop_y & cym;
1383
    p->crop_x &= ~cxm;
1384
    p->crop_y &= ~cym;
1385
    if (!p->w || p->w > width ) p->w = width;
1386
    if (!p->h || p->h > height) p->h = height;
1387
    if (p->crop_x + p->w > width ) p->crop_x = 0;
1388
    if (p->crop_y + p->h > height) p->crop_y = 0;
1389

    
1390
    if(!opt_screen_size_x && !opt_screen_size_y){
1391
        d_width = d_width * p->w/width;
1392
        d_height = d_height * p->h/height;
1393
    }
1394
    return vf_next_config(vf, p->w, p->h, d_width, d_height, flags, outfmt);
1395
}
1396

    
1397
static void uninit(struct vf_instance *vf)
1398
{
1399
    struct vf_priv_s *p = vf->priv;
1400
    mp_msg(MSGT_VFILTER, MSGL_INFO, "diff_time: %.3f, merge_time: %.3f, "
1401
           "export: %lu, merge: %lu, copy: %lu\n", p->diff_time, p->merge_time,
1402
           p->export_count, p->merge_count, p->num_copies);
1403
    free(p->memory_allocated);
1404
    free(p);
1405
}
1406

    
1407
static int vf_open(vf_instance_t *vf, char *args)
1408
{
1409
    struct vf_priv_s *p;
1410
    vf->get_image = get_image;
1411
    vf->put_image = put_image;
1412
    vf->config = config;
1413
    vf->query_format = query_format;
1414
    vf->uninit = uninit;
1415
    vf->default_reqs = VFCAP_ACCEPT_STRIDE;
1416
    vf->priv = p = calloc(1, sizeof(struct vf_priv_s));
1417
    p->out_dec = 5;
1418
    p->in_inc = 4;
1419
    p->thres.noise = 128;
1420
    p->thres.even  = 128;
1421
    p->sad_thres = 64;
1422
    p->dint_thres = 4;
1423
    p->luma_only = 0;
1424
    p->fast = 3;
1425
    p->mmx2 = gCpuCaps.hasMMX2 ? 1 : gCpuCaps.has3DNow ? 2 : 0;
1426
    if (args) {
1427
        const char *args_remain = parse_args(p, args);
1428
        if (args_remain) {
1429
            mp_msg(MSGT_VFILTER, MSGL_FATAL,
1430
                   "filmdint: unknown suboption: %s\n", args_remain);
1431
            return 0;
1432
        }
1433
        if (p->out_dec < p->in_inc) {
1434
            mp_msg(MSGT_VFILTER, MSGL_FATAL,
1435
                   "filmdint: increasing the frame rate is not supported\n");
1436
            return 0;
1437
        }
1438
    }
1439
    if (p->mmx2 > 2)
1440
        p->mmx2 = 0;
1441
#if !HAVE_MMX
1442
    p->mmx2 = 0;
1443
#endif
1444
#if !HAVE_AMD3DNOW
1445
    p->mmx2 &= 1;
1446
#endif
1447
    p->thres.odd  = p->thres.even;
1448
    p->thres.temp = p->thres.noise;
1449
    p->diff_time = 0;
1450
    p->merge_time = 0;
1451
    return 1;
1452
}
1453

    
1454
const vf_info_t vf_info_filmdint = {
1455
    "Advanced inverse telecine filer",
1456
    "filmdint",
1457
    "Zoltan Hidvegi",
1458
    "",
1459
    vf_open,
1460
    NULL
1461
};