ffmpeg / libavfilter / libmpcodecs / vf_filmdint.c @ e4852fb3
History  View  Annotate  Download (51.7 KB)
1 
/*


2 
* This file is part of MPlayer.

3 
*

4 
* MPlayer is free software; you can redistribute it and/or modify

5 
* it under the terms of the GNU General Public License as published by

6 
* the Free Software Foundation; either version 2 of the License, or

7 
* (at your option) any later version.

8 
*

9 
* MPlayer is distributed in the hope that it will be useful,

10 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

11 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

12 
* GNU General Public License for more details.

13 
*

14 
* You should have received a copy of the GNU General Public License along

15 
* with MPlayer; if not, write to the Free Software Foundation, Inc.,

16 
* 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA.

17 
*/

18  
19 
#include <stdio.h> 
20 
#include <stdlib.h> 
21 
#include <string.h> 
22 
#include <sys/time.h> 
23  
24 
#include "config.h" 
25 
#include "mp_msg.h" 
26 
#include "cpudetect.h" 
27  
28 
#include "img_format.h" 
29 
#include "mp_image.h" 
30 
#include "vd.h" 
31 
#include "vf.h" 
32 
#include "cmmx.h" 
33  
34 
#include "libvo/fastmemcpy.h" 
35  
36 
#define NUM_STORED 4 
37  
38 
enum pu_field_type_t {

39 
PU_1ST_OF_3, 
40 
PU_2ND_OF_3, 
41 
PU_3RD_OF_3, 
42 
PU_1ST_OF_2, 
43 
PU_2ND_OF_2, 
44 
PU_INTERLACED 
45 
}; 
46  
47 
struct metrics {

48 
/* This struct maps to a packed word 64bit MMX register */

49 
unsigned short int even; 
50 
unsigned short int odd; 
51 
unsigned short int noise; 
52 
unsigned short int temp; 
53 
} __attribute__ ((aligned (8)));

54  
55 
struct frame_stats {

56 
struct metrics tiny, low, high, bigger, twox, max;

57 
struct { unsigned int even, odd, noise, temp; } sad; 
58 
unsigned short interlaced_high; 
59 
unsigned short interlaced_low; 
60 
unsigned short num_blocks; 
61 
}; 
62  
63 
struct vf_priv_s {

64 
unsigned long inframes; 
65 
unsigned long outframes; 
66 
enum pu_field_type_t prev_type;

67 
unsigned swapped, chroma_swapped;

68 
unsigned luma_only;

69 
unsigned verbose;

70 
unsigned fast;

71 
unsigned long w, h, cw, ch, stride, chroma_stride, nplanes; 
72 
unsigned long sad_thres; 
73 
unsigned long dint_thres; 
74 
unsigned char *memory_allocated; 
75 
unsigned char *planes[2*NUM_STORED][4]; 
76 
unsigned char **old_planes; 
77 
unsigned long static_idx; 
78 
unsigned long temp_idx; 
79 
unsigned long crop_x, crop_y, crop_cx, crop_cy; 
80 
unsigned long export_count, merge_count; 
81 
unsigned long num_breaks; 
82 
unsigned long num_copies; 
83 
long in_inc, out_dec, iosync;

84 
long num_fields;

85 
long prev_fields;

86 
long notout;

87 
long mmx2;

88 
unsigned small_bytes[2]; 
89 
unsigned mmx_temp[2]; 
90 
struct frame_stats stats[2]; 
91 
struct metrics thres;

92 
char chflag;

93 
double diff_time, merge_time, decode_time, vo_time, filter_time;

94 
}; 
95  
96 
#define PPZ { 2000, 2000, 0, 2000 } 
97 
#define PPR { 2000, 2000, 0, 2000 } 
98 
static const struct frame_stats ppzs = {PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,PPZ,0,0,9999}; 
99 
static const struct frame_stats pprs = {PPR,PPR,PPR,PPR,PPR,PPR,PPR,0,0,9999}; 
100  
101 
#ifndef MIN

102 
#define MIN(a,b) (((a)<(b))?(a):(b))

103 
#endif

104 
#ifndef MAX

105 
#define MAX(a,b) (((a)>(b))?(a):(b))

106 
#endif

107  
108 
#define PDIFFUB(X,Y,T) "movq " #X "," #T "\n\t" \ 
109 
"psubusb " #Y "," #T "\n\t" \ 
110 
"psubusb " #X "," #Y "\n\t" \ 
111 
"paddusb " #Y "," #T "\n\t" 
112  
113 
#define PDIFFUBT(X,Y,T) "movq " #X "," #T "\n\t" \ 
114 
"psubusb " #Y "," #T "\n\t" \ 
115 
"psubusb " #X "," #Y "\n\t" \ 
116 
"paddusb " #T "," #Y "\n\t" 
117  
118 
#define PSUMBW(X,T,Z) "movq " #X "," #T "\n\t" \ 
119 
"punpcklbw " #Z "," #X "\n\t" \ 
120 
"punpckhbw " #Z "," #T "\n\t" \ 
121 
"paddw " #T "," #X "\n\t" \ 
122 
"movq " #X "," #T "\n\t" \ 
123 
"psllq $32, " #T "\n\t" \ 
124 
"paddw " #T "," #X "\n\t" \ 
125 
"movq " #X "," #T "\n\t" \ 
126 
"psllq $16, " #T "\n\t" \ 
127 
"paddw " #T "," #X "\n\t" \ 
128 
"psrlq $48, " #X "\n\t" 
129  
130 
#define PSADBW(X,Y,T,Z) PDIFFUBT(X,Y,T) PSUMBW(Y,T,Z)

131  
132 
#define PMAXUB(X,Y) "psubusb " #X "," #Y "\n\tpaddusb " #X "," #Y "\n\t" 
133 
#define PMAXUW(X,Y) "psubusw " #X "," #Y "\n\tpaddusw " #X "," #Y "\n\t" 
134 
#define PMINUBT(X,Y,T) "movq " #Y "," #T "\n\t" \ 
135 
"psubusb " #X "," #T "\n\t" \ 
136 
"psubusb " #T "," #Y "\n\t" 
137 
#define PAVGB(X,Y) "pavgusb " #X "," #Y "\n\t" 
138  
139 
static inline void 
140 
get_metrics_c(unsigned char *a, unsigned char *b, int as, int bs, int lines, 
141 
struct metrics *m)

142 
{ 
143 
a = as; 
144 
b = bs; 
145 
do {

146 
cmmx_t old_po = *(cmmx_t*)(a ); 
147 
cmmx_t po = *(cmmx_t*)(b ); 
148 
cmmx_t e = *(cmmx_t*)(b + bs); 
149 
cmmx_t old_o = *(cmmx_t*)(a + 2*as);

150 
cmmx_t o = *(cmmx_t*)(b + 2*bs);

151 
cmmx_t ne = *(cmmx_t*)(b + 3*bs);

152 
cmmx_t old_no = *(cmmx_t*)(a + 4*as);

153 
cmmx_t no = *(cmmx_t*)(b + 4*bs);

154  
155 
cmmx_t qup_old_odd = p31avgb(old_o, old_po); 
156 
cmmx_t qup_odd = p31avgb( o, po); 
157 
cmmx_t qdown_old_odd = p31avgb(old_o, old_no); 
158 
cmmx_t qdown_odd = p31avgb( o, no); 
159  
160 
cmmx_t qup_even = p31avgb(ne, e); 
161 
cmmx_t qdown_even = p31avgb(e, ne); 
162  
163 
cmmx_t temp_up_diff = pdiffub(qdown_even, qup_old_odd); 
164 
cmmx_t noise_up_diff = pdiffub(qdown_even, qup_odd); 
165 
cmmx_t temp_down_diff = pdiffub(qup_even, qdown_old_odd); 
166 
cmmx_t noise_down_diff = pdiffub(qup_even, qdown_odd); 
167  
168 
cmmx_t odd_diff = pdiffub(o, old_o); 
169 
m>odd += psumbw(odd_diff); 
170 
m>even += psadbw(e, *(cmmx_t*)(a+as)); 
171  
172 
temp_up_diff = pminub(temp_up_diff, temp_down_diff); 
173 
temp_up_diff = pminub(temp_up_diff, odd_diff); 
174 
m>temp += psumbw(temp_up_diff); 
175 
noise_up_diff = pminub(noise_up_diff, odd_diff); 
176 
noise_up_diff = pminub(noise_up_diff, noise_down_diff); 
177  
178 
m>noise += psumbw(noise_up_diff); 
179 
a += 2*as;

180 
b += 2*bs;

181 
} while (lines);

182 
} 
183  
184 
static inline void 
185 
get_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs, 
186 
int lines, struct metrics *m) 
187 
{ 
188 
a = as; 
189 
b = bs; 
190 
do {

191 
cmmx_t old_po = (*(cmmx_t*)(a ) >> 1) & ~SIGN_BITS;

192 
cmmx_t po = (*(cmmx_t*)(b ) >> 1) & ~SIGN_BITS;

193 
cmmx_t old_e = (*(cmmx_t*)(a + as) >> 1) & ~SIGN_BITS;

194 
cmmx_t e = (*(cmmx_t*)(b + bs) >> 1) & ~SIGN_BITS;

195 
cmmx_t old_o = (*(cmmx_t*)(a + 2*as) >> 1) & ~SIGN_BITS; 
196 
cmmx_t o = (*(cmmx_t*)(b + 2*bs) >> 1) & ~SIGN_BITS; 
197 
cmmx_t ne = (*(cmmx_t*)(b + 3*bs) >> 1) & ~SIGN_BITS; 
198 
cmmx_t old_no = (*(cmmx_t*)(a + 4*as) >> 1) & ~SIGN_BITS; 
199 
cmmx_t no = (*(cmmx_t*)(b + 4*bs) >> 1) & ~SIGN_BITS; 
200  
201 
cmmx_t qup_old_odd = p31avgb_s(old_o, old_po); 
202 
cmmx_t qup_odd = p31avgb_s( o, po); 
203 
cmmx_t qdown_old_odd = p31avgb_s(old_o, old_no); 
204 
cmmx_t qdown_odd = p31avgb_s( o, no); 
205  
206 
cmmx_t qup_even = p31avgb_s(ne, e); 
207 
cmmx_t qdown_even = p31avgb_s(e, ne); 
208  
209 
cmmx_t temp_up_diff = pdiffub_s(qdown_even, qup_old_odd); 
210 
cmmx_t noise_up_diff = pdiffub_s(qdown_even, qup_odd); 
211 
cmmx_t temp_down_diff = pdiffub_s(qup_even, qdown_old_odd); 
212 
cmmx_t noise_down_diff = pdiffub_s(qup_even, qdown_odd); 
213  
214 
cmmx_t odd_diff = pdiffub_s(o, old_o); 
215 
m>odd += psumbw_s(odd_diff) << 1;

216 
m>even += psadbw_s(e, old_e) << 1;

217  
218 
temp_up_diff = pminub_s(temp_up_diff, temp_down_diff); 
219 
temp_up_diff = pminub_s(temp_up_diff, odd_diff); 
220 
m>temp += psumbw_s(temp_up_diff) << 1;

221 
noise_up_diff = pminub_s(noise_up_diff, odd_diff); 
222 
noise_up_diff = pminub_s(noise_up_diff, noise_down_diff); 
223  
224 
m>noise += psumbw_s(noise_up_diff) << 1;

225 
a += 2*as;

226 
b += 2*bs;

227 
} while (lines);

228 
} 
229  
230 
static inline void 
231 
get_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs, 
232 
int lines, struct metrics *m) 
233 
{ 
234 
a = as; 
235 
b = bs; 
236 
do {

237 
cmmx_t old_po = (*(cmmx_t*)(a )>>1) & ~SIGN_BITS;

238 
cmmx_t po = (*(cmmx_t*)(b )>>1) & ~SIGN_BITS;

239 
cmmx_t old_e = (*(cmmx_t*)(a + as)>>1) & ~SIGN_BITS;

240 
cmmx_t e = (*(cmmx_t*)(b + bs)>>1) & ~SIGN_BITS;

241 
cmmx_t old_o = (*(cmmx_t*)(a + 2*as)>>1) & ~SIGN_BITS; 
242 
cmmx_t o = (*(cmmx_t*)(b + 2*bs)>>1) & ~SIGN_BITS; 
243 
cmmx_t ne = (*(cmmx_t*)(b + 3*bs)>>1) & ~SIGN_BITS; 
244  
245 
cmmx_t down_even = p31avgb_s(e, ne); 
246 
cmmx_t up_odd = p31avgb_s(o, po); 
247 
cmmx_t up_old_odd = p31avgb_s(old_o, old_po); 
248  
249 
cmmx_t odd_diff = pdiffub_s(o, old_o); 
250 
cmmx_t temp_diff = pdiffub_s(down_even, up_old_odd); 
251 
cmmx_t noise_diff = pdiffub_s(down_even, up_odd); 
252  
253 
m>even += psadbw_s(e, old_e) << 1;

254 
m>odd += psumbw_s(odd_diff) << 1;

255  
256 
temp_diff = pminub_s(temp_diff, odd_diff); 
257 
noise_diff = pminub_s(noise_diff, odd_diff); 
258  
259 
m>noise += psumbw_s(noise_diff) << 1;

260 
m>temp += psumbw_s(temp_diff) << 1;

261 
a += 2*as;

262 
b += 2*bs;

263 
} while (lines);

264  
265 
} 
266  
267 
static inline void 
268 
get_block_stats(struct metrics *m, struct vf_priv_s *p, struct frame_stats *s) 
269 
{ 
270 
unsigned two_e = m>even + MAX(m>even , p>thres.even );

271 
unsigned two_o = m>odd + MAX(m>odd , p>thres.odd );

272 
unsigned two_n = m>noise + MAX(m>noise, p>thres.noise);

273 
unsigned two_t = m>temp + MAX(m>temp , p>thres.temp );

274  
275 
unsigned e_big = m>even >= (m>odd + two_o + 1)/2; 
276 
unsigned o_big = m>odd >= (m>even + two_e + 1)/2; 
277 
unsigned n_big = m>noise >= (m>temp + two_t + 1)/2; 
278 
unsigned t_big = m>temp >= (m>noise + two_n + 1)/2; 
279  
280 
unsigned e2x = m>even >= two_o;

281 
unsigned o2x = m>odd >= two_e;

282 
unsigned n2x = m>noise >= two_t;

283 
unsigned t2x = m>temp >= two_n;

284  
285 
unsigned ntiny_e = m>even > p>thres.even ;

286 
unsigned ntiny_o = m>odd > p>thres.odd ;

287 
unsigned ntiny_n = m>noise > p>thres.noise;

288 
unsigned ntiny_t = m>temp > p>thres.temp ;

289  
290 
unsigned nlow_e = m>even > 2*p>thres.even ; 
291 
unsigned nlow_o = m>odd > 2*p>thres.odd ; 
292 
unsigned nlow_n = m>noise > 2*p>thres.noise; 
293 
unsigned nlow_t = m>temp > 2*p>thres.temp ; 
294  
295 
unsigned high_e = m>even > 4*p>thres.even ; 
296 
unsigned high_o = m>odd > 4*p>thres.odd ; 
297 
unsigned high_n = m>noise > 4*p>thres.noise; 
298 
unsigned high_t = m>temp > 4*p>thres.temp ; 
299  
300 
unsigned low_il = !n_big && !t_big && ntiny_n && ntiny_t;

301 
unsigned high_il = !n_big && !t_big && nlow_n && nlow_t;

302  
303 
if (low_il  high_il) {

304 
s>interlaced_low += low_il; 
305 
s>interlaced_high += high_il; 
306 
} else {

307 
s>tiny.even += ntiny_e; 
308 
s>tiny.odd += ntiny_o; 
309 
s>tiny.noise += ntiny_n; 
310 
s>tiny.temp += ntiny_t; 
311  
312 
s>low .even += nlow_e ; 
313 
s>low .odd += nlow_o ; 
314 
s>low .noise += nlow_n ; 
315 
s>low .temp += nlow_t ; 
316  
317 
s>high.even += high_e ; 
318 
s>high.odd += high_o ; 
319 
s>high.noise += high_n ; 
320 
s>high.temp += high_t ; 
321  
322 
if (m>even >= p>sad_thres) s>sad.even += m>even ;

323 
if (m>odd >= p>sad_thres) s>sad.odd += m>odd ;

324 
if (m>noise >= p>sad_thres) s>sad.noise += m>noise;

325 
if (m>temp >= p>sad_thres) s>sad.temp += m>temp ;

326 
} 
327 
s>num_blocks++; 
328 
s>max.even = MAX(s>max.even , m>even ); 
329 
s>max.odd = MAX(s>max.odd , m>odd ); 
330 
s>max.noise = MAX(s>max.noise, m>noise); 
331 
s>max.temp = MAX(s>max.temp , m>temp ); 
332  
333 
s>bigger.even += e_big ; 
334 
s>bigger.odd += o_big ; 
335 
s>bigger.noise += n_big ; 
336 
s>bigger.temp += t_big ; 
337  
338 
s>twox.even += e2x ; 
339 
s>twox.odd += o2x ; 
340 
s>twox.noise += n2x ; 
341 
s>twox.temp += t2x ; 
342  
343 
} 
344  
345 
static inline struct metrics 
346 
block_metrics_c(unsigned char *a, unsigned char *b, int as, int bs, 
347 
int lines, struct vf_priv_s *p, struct frame_stats *s) 
348 
{ 
349 
struct metrics tm;

350 
tm.even = tm.odd = tm.noise = tm.temp = 0;

351 
get_metrics_c(a, b, as, bs, lines, &tm); 
352 
if (sizeof(cmmx_t) < 8) 
353 
get_metrics_c(a+4, b+4, as, bs, lines, &tm); 
354 
get_block_stats(&tm, p, s); 
355 
return tm;

356 
} 
357  
358 
static inline struct metrics 
359 
block_metrics_fast_c(unsigned char *a, unsigned char *b, int as, int bs, 
360 
int lines, struct vf_priv_s *p, struct frame_stats *s) 
361 
{ 
362 
struct metrics tm;

363 
tm.even = tm.odd = tm.noise = tm.temp = 0;

364 
get_metrics_fast_c(a, b, as, bs, lines, &tm); 
365 
if (sizeof(cmmx_t) < 8) 
366 
get_metrics_fast_c(a+4, b+4, as, bs, lines, &tm); 
367 
get_block_stats(&tm, p, s); 
368 
return tm;

369 
} 
370  
371 
static inline struct metrics 
372 
block_metrics_faster_c(unsigned char *a, unsigned char *b, int as, int bs, 
373 
int lines, struct vf_priv_s *p, struct frame_stats *s) 
374 
{ 
375 
struct metrics tm;

376 
tm.even = tm.odd = tm.noise = tm.temp = 0;

377 
get_metrics_faster_c(a, b, as, bs, lines, &tm); 
378 
if (sizeof(cmmx_t) < 8) 
379 
get_metrics_faster_c(a+4, b+4, as, bs, lines, &tm); 
380 
get_block_stats(&tm, p, s); 
381 
return tm;

382 
} 
383  
384 
#define MEQ(X,Y) ((X).even == (Y).even && (X).odd == (Y).odd && (X).temp == (Y).temp && (X).noise == (Y).noise)

385  
386 
#define BLOCK_METRICS_TEMPLATE() \

387 
__asm__ volatile("pxor %mm7, %mm7\n\t" /* The result is colleted in mm7 */ \ 
388 
"pxor %mm6, %mm6\n\t" /* Temp to stay at 0 */ \ 
389 
); \ 
390 
a = as; \ 
391 
b = bs; \ 
392 
do { \

393 
__asm__ volatile( \

394 
"movq (%0,%2), %%mm0\n\t" \

395 
"movq (%1,%3), %%mm1\n\t" /* mm1 = even */ \ 
396 
PSADBW(%%mm1, %%mm0, %%mm4, %%mm6) \ 
397 
"paddusw %%mm0, %%mm7\n\t" /* even diff */ \ 
398 
"movq (%0,%2,2), %%mm0\n\t" /* mm0 = old odd */ \ 
399 
"movq (%1,%3,2), %%mm2\n\t" /* mm2 = odd */ \ 
400 
"movq (%0), %%mm3\n\t" \

401 
"psubusb %4, %%mm3\n\t" \

402 
PAVGB(%%mm0, %%mm3) \ 
403 
PAVGB(%%mm0, %%mm3) /* mm3 = qup old odd */ \

404 
"movq %%mm0, %%mm5\n\t" \

405 
PSADBW(%%mm2, %%mm0, %%mm4, %%mm6) \ 
406 
"psllq $16, %%mm0\n\t" \

407 
"paddusw %%mm0, %%mm7\n\t" \

408 
"movq (%1), %%mm4\n\t" \

409 
"lea (%0,%2,2), %0\n\t" \

410 
"lea (%1,%3,2), %1\n\t" \

411 
"psubusb %4, %%mm4\n\t" \

412 
PAVGB(%%mm2, %%mm4) \ 
413 
PAVGB(%%mm2, %%mm4) /* mm4 = qup odd */ \

414 
PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 =abs(oldoddodd) */ \

415 
"movq (%1,%3), %%mm5\n\t" \

416 
"psubusb %4, %%mm5\n\t" \

417 
PAVGB(%%mm1, %%mm5) \ 
418 
PAVGB(%%mm5, %%mm1) /* mm1 = qdown even */ \

419 
PAVGB((%1,%3), %%mm5) /* mm5 = qup next even */ \ 
420 
PDIFFUBT(%%mm1, %%mm3, %%mm0) /* mm3 = abs(qupoldoqde) */ \

421 
PDIFFUBT(%%mm1, %%mm4, %%mm0) /* mm4 = abs(qupoddqde) */ \

422 
PMINUBT(%%mm2, %%mm3, %%mm0) /* limit temp to odd diff */ \

423 
PMINUBT(%%mm2, %%mm4, %%mm0) /* limit noise to odd diff */ \

424 
"movq (%1,%3,2), %%mm2\n\t" \

425 
"psubusb %4, %%mm2\n\t" \

426 
PAVGB((%1), %%mm2) \

427 
PAVGB((%1), %%mm2) /* mm2 = qdown odd */ \ 
428 
"movq (%0,%2,2), %%mm1\n\t" \

429 
"psubusb %4, %%mm1\n\t" \

430 
PAVGB((%0), %%mm1) \

431 
PAVGB((%0), %%mm1) /* mm1 = qdown old odd */ \ 
432 
PDIFFUBT(%%mm5, %%mm2, %%mm0) /* mm2 = abs(qdoqune) */ \

433 
PDIFFUBT(%%mm5, %%mm1, %%mm0) /* mm1 = abs(qdooqune) */ \

434 
PMINUBT(%%mm4, %%mm2, %%mm0) /* current */ \

435 
PMINUBT(%%mm3, %%mm1, %%mm0) /* old */ \

436 
PSUMBW(%%mm2, %%mm0, %%mm6) \ 
437 
PSUMBW(%%mm1, %%mm0, %%mm6) \ 
438 
"psllq $32, %%mm2\n\t" \

439 
"psllq $48, %%mm1\n\t" \

440 
"paddusw %%mm2, %%mm7\n\t" \

441 
"paddusw %%mm1, %%mm7\n\t" \

442 
: "=r" (a), "=r" (b) \ 
443 
: "r"((x86_reg)as), "r"((x86_reg)bs), "m" (ones), "0"(a), "1"(b), "X"(*a), "X"(*b) \ 
444 
); \ 
445 
} while (lines);

446  
447 
static inline struct metrics 
448 
block_metrics_3dnow(unsigned char *a, unsigned char *b, int as, int bs, 
449 
int lines, struct vf_priv_s *p, struct frame_stats *s) 
450 
{ 
451 
struct metrics tm;

452 
#if !HAVE_AMD3DNOW

453 
mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_3dnow: internal error\n");

454 
#else

455 
static const unsigned long long ones = 0x0101010101010101ull; 
456  
457 
BLOCK_METRICS_TEMPLATE(); 
458 
__asm__ volatile("movq %%mm7, %0\n\temms" : "=m" (tm)); 
459 
get_block_stats(&tm, p, s); 
460 
#endif

461 
return tm;

462 
} 
463  
464 
#undef PSUMBW

465 
#undef PSADBW

466 
#undef PMAXUB

467 
#undef PMINUBT

468 
#undef PAVGB

469  
470 
#define PSUMBW(X,T,Z) "psadbw " #Z "," #X "\n\t" 
471 
#define PSADBW(X,Y,T,Z) "psadbw " #X "," #Y "\n\t" 
472 
#define PMAXUB(X,Y) "pmaxub " #X "," #Y "\n\t" 
473 
#define PMINUBT(X,Y,T) "pminub " #X "," #Y "\n\t" 
474 
#define PAVGB(X,Y) "pavgb " #X "," #Y "\n\t" 
475  
476 
static inline struct metrics 
477 
block_metrics_mmx2(unsigned char *a, unsigned char *b, int as, int bs, 
478 
int lines, struct vf_priv_s *p, struct frame_stats *s) 
479 
{ 
480 
struct metrics tm;

481 
#if !HAVE_MMX

482 
mp_msg(MSGT_VFILTER, MSGL_FATAL, "block_metrics_mmx2: internal error\n");

483 
#else

484 
static const unsigned long long ones = 0x0101010101010101ull; 
485 
x86_reg interlaced; 
486 
x86_reg prefetch_line = (((long)a>>3) & 7) + 10; 
487 
#ifdef DEBUG

488 
struct frame_stats ts = *s;

489 
#endif

490 
__asm__ volatile("prefetcht0 (%0,%2)\n\t" 
491 
"prefetcht0 (%1,%3)\n\t" :

492 
: "r" (a), "r" (b), 
493 
"r" (prefetch_line * as), "r" (prefetch_line * bs)); 
494  
495 
BLOCK_METRICS_TEMPLATE(); 
496  
497 
s>num_blocks++; 
498 
__asm__ volatile(

499 
"movq %3, %%mm0\n\t"

500 
"movq %%mm7, %%mm1\n\t"

501 
"psubusw %%mm0, %%mm1\n\t"

502 
"movq %%mm1, %%mm2\n\t"

503 
"paddusw %%mm0, %%mm2\n\t"

504 
"paddusw %%mm7, %%mm2\n\t"

505 
"pshufw $0xb1, %%mm2, %%mm3\n\t"

506 
"pavgw %%mm7, %%mm2\n\t"

507 
"pshufw $0xb1, %%mm2, %%mm2\n\t"

508 
"psubusw %%mm7, %%mm2\n\t"

509 
"pcmpeqw %%mm6, %%mm2\n\t" /* 1 if >= 1.5x */ 
510 
"psubusw %%mm7, %%mm3\n\t"

511 
"pcmpeqw %%mm6, %%mm3\n\t" /* 1 if >= 2x */ 
512 
"movq %1, %%mm4\n\t"

513 
"movq %2, %%mm5\n\t"

514 
"psubw %%mm2, %%mm4\n\t"

515 
"psubw %%mm3, %%mm5\n\t"

516 
"movq %%mm4, %1\n\t"

517 
"movq %%mm5, %2\n\t"

518 
"pxor %%mm4, %%mm4\n\t"

519 
"pcmpeqw %%mm1, %%mm4\n\t" /* 1 if <= t */ 
520 
"psubusw %%mm0, %%mm1\n\t"

521 
"pxor %%mm5, %%mm5\n\t"

522 
"pcmpeqw %%mm1, %%mm5\n\t" /* 1 if <= 2t */ 
523 
"psubusw %%mm0, %%mm1\n\t"

524 
"psubusw %%mm0, %%mm1\n\t"

525 
"pcmpeqw %%mm6, %%mm1\n\t" /* 1 if <= 4t */ 
526 
"pshufw $0xb1, %%mm2, %%mm0\n\t"

527 
"por %%mm2, %%mm0\n\t" /* 1 if not close */ 
528 
"punpckhdq %%mm0, %%mm0\n\t"

529 
"movq %%mm4, %%mm2\n\t" /* tttt */ 
530 
"punpckhdq %%mm5, %%mm2\n\t" /* ttll */ 
531 
"por %%mm2, %%mm0\n\t"

532 
"pcmpeqd %%mm6, %%mm0\n\t" /* close && big */ 
533 
"psrlq $16, %%mm0\n\t"

534 
"psrlw $15, %%mm0\n\t"

535 
"movd %%mm0, %0\n\t"

536 
: "=r" (interlaced), "=m" (s>bigger), "=m" (s>twox) 
537 
: "m" (p>thres)

538 
); 
539  
540 
if (interlaced) {

541 
s>interlaced_high += interlaced >> 16;

542 
s>interlaced_low += interlaced; 
543 
} else {

544 
__asm__ volatile(

545 
"pcmpeqw %%mm0, %%mm0\n\t" /* 1 */ 
546 
"psubw %%mm0, %%mm4\n\t"

547 
"psubw %%mm0, %%mm5\n\t"

548 
"psubw %%mm0, %%mm1\n\t"

549 
"paddw %0, %%mm4\n\t"

550 
"paddw %1, %%mm5\n\t"

551 
"paddw %2, %%mm1\n\t"

552 
"movq %%mm4, %0\n\t"

553 
"movq %%mm5, %1\n\t"

554 
"movq %%mm1, %2\n\t"

555 
: "=m" (s>tiny), "=m" (s>low), "=m" (s>high) 
556 
); 
557  
558 
__asm__ volatile(

559 
"pshufw $0, %2, %%mm0\n\t"

560 
"psubusw %%mm7, %%mm0\n\t"

561 
"pcmpeqw %%mm6, %%mm0\n\t" /* 0 if below sad_thres */ 
562 
"pand %%mm7, %%mm0\n\t"

563 
"movq %%mm0, %%mm1\n\t"

564 
"punpcklwd %%mm6, %%mm0\n\t" /* sad even, odd */ 
565 
"punpckhwd %%mm6, %%mm1\n\t" /* sad noise, temp */ 
566 
"paddd %0, %%mm0\n\t"

567 
"paddd %1, %%mm1\n\t"

568 
"movq %%mm0, %0\n\t"

569 
"movq %%mm1, %1\n\t"

570 
: "=m" (s>sad.even), "=m" (s>sad.noise) 
571 
: "m" (p>sad_thres)

572 
); 
573 
} 
574  
575 
__asm__ volatile(

576 
"movq %%mm7, (%1)\n\t"

577 
PMAXUW((%0), %%mm7)

578 
"movq %%mm7, (%0)\n\t"

579 
"emms"

580 
: : "r" (&s>max), "r" (&tm), "X" (s>max) 
581 
: "memory"

582 
); 
583 
#ifdef DEBUG

584 
if (1) { 
585 
struct metrics cm;

586 
a = 7*as;

587 
b = 7*bs;

588 
cm = block_metrics_c(a, b, as, bs, 4, p, &ts);

589 
if (!MEQ(tm, cm))

590 
mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad metrics\n");

591 
if (s) {

592 
# define CHECK(X) if (!MEQ(s>X, ts.X)) \ 
593 
mp_msg(MSGT_VFILTER, MSGL_WARN, "Bad " #X "\n"); 
594 
CHECK(tiny); 
595 
CHECK(low); 
596 
CHECK(high); 
597 
CHECK(sad); 
598 
CHECK(max); 
599 
} 
600 
} 
601 
#endif

602 
#endif

603 
return tm;

604 
} 
605  
606 
static inline int 
607 
dint_copy_line_mmx2(unsigned char *dst, unsigned char *a, long bos, 
608 
long cos, int ds, int ss, int w, int t) 
609 
{ 
610 
#if !HAVE_MMX

611 
mp_msg(MSGT_VFILTER, MSGL_FATAL, "dint_copy_line_mmx2: internal error\n");

612 
return 0; 
613 
#else

614 
unsigned long len = (w+7) >> 3; 
615 
int ret;

616 
__asm__ volatile (

617 
"pxor %%mm6, %%mm6 \n\t" /* deinterlaced pixel counter */ 
618 
"movd %0, %%mm7 \n\t"

619 
"punpcklbw %%mm7, %%mm7 \n\t"

620 
"punpcklwd %%mm7, %%mm7 \n\t"

621 
"punpckldq %%mm7, %%mm7 \n\t" /* mm7 = threshold */ 
622 
: /* no output */

623 
: "rm" (t)

624 
); 
625 
do {

626 
__asm__ volatile (

627 
"movq (%0), %%mm0\n\t"

628 
"movq (%0,%3,2), %%mm1\n\t"

629 
"movq %%mm0, (%2)\n\t"

630 
"pmaxub %%mm1, %%mm0\n\t"

631 
"pavgb (%0), %%mm1\n\t"

632 
"psubusb %%mm1, %%mm0\n\t"

633 
"paddusb %%mm7, %%mm0\n\t" /* mm0 = maxavg+thr */ 
634 
"movq (%0,%1), %%mm2\n\t"

635 
"movq (%0,%5), %%mm3\n\t"

636 
"movq %%mm2, %%mm4\n\t"

637 
PDIFFUBT(%%mm1, %%mm2, %%mm5) 
638 
PDIFFUBT(%%mm1, %%mm3, %%mm5) 
639 
"pminub %%mm2, %%mm3\n\t"

640 
"pcmpeqb %%mm3, %%mm2\n\t" /* b = min */ 
641 
"pand %%mm2, %%mm4\n\t"

642 
"pandn (%0,%5), %%mm2\n\t"

643 
"por %%mm4, %%mm2\n\t"

644 
"pminub %%mm0, %%mm3\n\t"

645 
"pcmpeqb %%mm0, %%mm3\n\t" /* set to 1s if >= threshold */ 
646 
"psubb %%mm3, %%mm6\n\t" /* count pixels above thr. */ 
647 
"pand %%mm3, %%mm1 \n\t"

648 
"pandn %%mm2, %%mm3 \n\t"

649 
"por %%mm3, %%mm1 \n\t" /* avg if >= threshold */ 
650 
"movq %%mm1, (%2,%4) \n\t"

651 
: /* no output */

652 
: "r" (a), "r" ((x86_reg)bos), "r" ((x86_reg)dst), "r" ((x86_reg)ss), "r" ((x86_reg)ds), "r" ((x86_reg)cos) 
653 
); 
654 
a += 8;

655 
dst += 8;

656 
} while (len);

657  
658 
__asm__ volatile ("pxor %%mm7, %%mm7 \n\t" 
659 
"psadbw %%mm6, %%mm7 \n\t"

660 
"movd %%mm7, %0 \n\t"

661 
"emms \n\t"

662 
: "=r" (ret)

663 
); 
664 
return ret;

665 
#endif

666 
} 
667  
668 
static inline int 
669 
dint_copy_line(unsigned char *dst, unsigned char *a, long bos, 
670 
long cos, int ds, int ss, int w, int t) 
671 
{ 
672 
unsigned long len = ((unsigned long)w+sizeof(cmmx_t)1) / sizeof(cmmx_t); 
673 
cmmx_t dint_count = 0;

674 
cmmx_t thr; 
675 
t = t << 8;

676 
thr = t  (t << 16);

677 
if (sizeof(cmmx_t) > 4) 
678 
thr = thr << (sizeof(cmmx_t)*4); 
679 
do {

680 
cmmx_t e = *(cmmx_t*)a; 
681 
cmmx_t ne = *(cmmx_t*)(a+2*ss);

682 
cmmx_t o = *(cmmx_t*)(a+bos); 
683 
cmmx_t oo = *(cmmx_t*)(a+cos); 
684 
cmmx_t maxe = pmaxub(e, ne); 
685 
cmmx_t avge = pavgb(e, ne); 
686 
cmmx_t max_diff = maxe  avge + thr; /* 0<=maxavg<128, thr<128 */

687 
cmmx_t diffo = pdiffub(avge, o); 
688 
cmmx_t diffoo = pdiffub(avge, oo); 
689 
cmmx_t diffcmp = pcmpgtub(diffo, diffoo); 
690 
cmmx_t bo = ((oo ^ o) & diffcmp) ^ o; 
691 
cmmx_t diffbo = ((diffoo ^ diffo) & diffcmp) ^ diffo; 
692 
cmmx_t above_thr = ~pcmpgtub(max_diff, diffbo); 
693 
cmmx_t bo_or_avg = ((avge ^ bo) & above_thr) ^ bo; 
694 
dint_count += above_thr & ONE_BYTES; 
695 
*(cmmx_t*)(dst) = e; 
696 
*(cmmx_t*)(dst+ds) = bo_or_avg; 
697 
a += sizeof(cmmx_t);

698 
dst += sizeof(cmmx_t);

699 
} while (len);

700 
return psumbw(dint_count);

701 
} 
702  
703 
static int 
704 
dint_copy_plane(unsigned char *d, unsigned char *a, unsigned char *b, 
705 
unsigned char *c, unsigned long w, unsigned long h, 
706 
unsigned long ds, unsigned long ss, unsigned long threshold, 
707 
long field, long mmx2) 
708 
{ 
709 
unsigned long ret = 0; 
710 
long bos = b  a;

711 
long cos = c  a;

712 
if (field) {

713 
fast_memcpy(d, b, w); 
714 
h; 
715 
d += ds; 
716 
a += ss; 
717 
} 
718 
bos += ss; 
719 
cos += ss; 
720 
while (h > 2) { 
721 
if (threshold >= 128) { 
722 
fast_memcpy(d, a, w); 
723 
fast_memcpy(d+ds, a+bos, w); 
724 
} else if (mmx2 == 1) { 
725 
ret += dint_copy_line_mmx2(d, a, bos, cos, ds, ss, w, threshold); 
726 
} else

727 
ret += dint_copy_line(d, a, bos, cos, ds, ss, w, threshold); 
728 
h = 2;

729 
d += 2*ds;

730 
a += 2*ss;

731 
} 
732 
fast_memcpy(d, a, w); 
733 
if (h == 2) 
734 
fast_memcpy(d+ds, a+bos, w); 
735 
return ret;

736 
} 
737  
738 
static void 
739 
copy_merge_fields(struct vf_priv_s *p, mp_image_t *dmpi,

740 
unsigned char **old, unsigned char **new, unsigned long show) 
741 
{ 
742 
unsigned long threshold = 256; 
743 
unsigned long field = p>swapped; 
744 
unsigned long dint_pixels = 0; 
745 
unsigned char **other = old; 
746 
if (show >= 12  !(show & 3)) 
747 
show >>= 2, other = new, new = old;

748 
if (show <= 2) { /* Single field: deinterlace */ 
749 
threshold = p>dint_thres; 
750 
field ^= show & 1;

751 
old = new; 
752 
} else if (show == 3) 
753 
old = new; 
754 
else

755 
field ^= 1;

756 
dint_pixels +=dint_copy_plane(dmpi>planes[0], old[0], new[0], 
757 
other[0], p>w, p>h, dmpi>stride[0], 
758 
p>stride, threshold, field, p>mmx2); 
759 
if (dmpi>flags & MP_IMGFLAG_PLANAR) {

760 
if (p>luma_only)

761 
old = new, other = new; 
762 
else

763 
threshold = threshold/2 + 1; 
764 
field ^= p>chroma_swapped; 
765 
dint_copy_plane(dmpi>planes[1], old[1], new[1], 
766 
other[1], p>cw, p>ch, dmpi>stride[1], 
767 
p>chroma_stride, threshold, field, p>mmx2); 
768 
dint_copy_plane(dmpi>planes[2], old[2], new[2], 
769 
other[2], p>cw, p>ch, dmpi>stride[2], 
770 
p>chroma_stride, threshold, field, p>mmx2); 
771 
} 
772 
if (dint_pixels > 0 && p>verbose) 
773 
mp_msg(MSGT_VFILTER,MSGL_INFO,"Deinterlaced %lu pixels\n",dint_pixels);

774 
} 
775  
776 
static void diff_planes(struct vf_priv_s *p, struct frame_stats *s, 
777 
unsigned char *of, unsigned char *nf, 
778 
int w, int h, int os, int ns, int swapped) 
779 
{ 
780 
int i, y;

781 
int align = (long)nf & 7; 
782 
of += align; 
783 
nf += align; 
784 
w = align; 
785 
if (swapped)

786 
of = os, nf = ns; 
787 
i = (h*3 >> 7) & ~1; 
788 
of += i*os + 8;

789 
nf += i*ns + 8;

790 
h = i; 
791 
w = 16;

792  
793 
memset(s, 0, sizeof(*s)); 
794  
795 
for (y = (h8) >> 3; y; y) { 
796 
if (p>mmx2 == 1) { 
797 
for (i = 0; i < w; i += 8) 
798 
block_metrics_mmx2(of+i, nf+i, os, ns, 4, p, s);

799 
} else if (p>mmx2 == 2) { 
800 
for (i = 0; i < w; i += 8) 
801 
block_metrics_3dnow(of+i, nf+i, os, ns, 4, p, s);

802 
} else if (p>fast > 3) { 
803 
for (i = 0; i < w; i += 8) 
804 
block_metrics_faster_c(of+i, nf+i, os, ns, 4, p, s);

805 
} else if (p>fast > 1) { 
806 
for (i = 0; i < w; i += 8) 
807 
block_metrics_fast_c(of+i, nf+i, os, ns, 4, p, s);

808 
} else {

809 
for (i = 0; i < w; i += 8) 
810 
block_metrics_c(of+i, nf+i, os, ns, 4, p, s);

811 
} 
812 
of += 8*os;

813 
nf += 8*ns;

814 
} 
815 
} 
816  
817 
#define METRICS(X) (X).even, (X).odd, (X).noise, (X).temp

818  
819 
static void diff_fields(struct vf_priv_s *p, struct frame_stats *s, 
820 
unsigned char **old, unsigned char **new) 
821 
{ 
822 
diff_planes(p, s, old[0], new[0], p>w, p>h, 
823 
p>stride, p>stride, p>swapped); 
824 
s>sad.even = (s>sad.even * 16ul) / s>num_blocks;

825 
s>sad.odd = (s>sad.odd * 16ul) / s>num_blocks;

826 
s>sad.noise = (s>sad.noise * 16ul) / s>num_blocks;

827 
s>sad.temp = (s>sad.temp * 16ul) / s>num_blocks;

828 
if (p>verbose)

829 
mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu%c M:%d/%d/%d/%d  %d, "

830 
"t:%d/%d/%d/%d, l:%d/%d/%d/%d, h:%d/%d/%d/%d, bg:%d/%d/%d/%d, "

831 
"2x:%d/%d/%d/%d, sad:%d/%d/%d/%d, lil:%d, hil:%d, ios:%.1f\n",

832 
p>inframes, p>chflag, METRICS(s>max), s>num_blocks, 
833 
METRICS(s>tiny), METRICS(s>low), METRICS(s>high), 
834 
METRICS(s>bigger), METRICS(s>twox), METRICS(s>sad), 
835 
s>interlaced_low, s>interlaced_high, 
836 
p>iosync / (double) p>in_inc);

837 
} 
838  
839 
static const char *parse_args(struct vf_priv_s *p, const char *args) 
840 
{ 
841 
args; 
842 
while (args && *++args &&

843 
(sscanf(args, "io=%lu:%lu", &p>out_dec, &p>in_inc) == 2  
844 
sscanf(args, "diff_thres=%hu", &p>thres.even ) == 1  
845 
sscanf(args, "comb_thres=%hu", &p>thres.noise) == 1  
846 
sscanf(args, "sad_thres=%lu", &p>sad_thres ) == 1  
847 
sscanf(args, "dint_thres=%lu", &p>dint_thres ) == 1  
848 
sscanf(args, "fast=%u", &p>fast ) == 1  
849 
sscanf(args, "mmx2=%lu", &p>mmx2 ) == 1  
850 
sscanf(args, "luma_only=%u", &p>luma_only ) == 1  
851 
sscanf(args, "verbose=%u", &p>verbose ) == 1  
852 
sscanf(args, "crop=%lu:%lu:%lu:%lu", &p>w,

853 
&p>h, &p>crop_x, &p>crop_y) == 4))

854 
args = strchr(args, '/');

855 
return args;

856 
} 
857  
858 
static unsigned long gcd(unsigned long x, unsigned long y) 
859 
{ 
860 
unsigned long t; 
861 
if (x > y)

862 
t = x, x = y, y = t; 
863  
864 
while (x) {

865 
t = y % x; 
866 
y = x; 
867 
x = t; 
868 
} 
869 
return y;

870 
} 
871  
872 
static void init(struct vf_priv_s *p, mp_image_t *mpi) 
873 
{ 
874 
unsigned long i; 
875 
unsigned long plane_size, chroma_plane_size; 
876 
unsigned char *plane; 
877 
unsigned long cos, los; 
878 
p>crop_cx = p>crop_x >> mpi>chroma_x_shift; 
879 
p>crop_cy = p>crop_y >> mpi>chroma_y_shift; 
880 
if (mpi>flags & MP_IMGFLAG_ACCEPT_STRIDE) {

881 
p>stride = (mpi>w + 15) & ~15; 
882 
p>chroma_stride = p>stride >> mpi>chroma_x_shift; 
883 
} else {

884 
p>stride = mpi>width; 
885 
p>chroma_stride = mpi>chroma_width; 
886 
} 
887 
p>cw = p>w >> mpi>chroma_x_shift; 
888 
p>ch = p>h >> mpi>chroma_y_shift; 
889 
p>nplanes = 1;

890 
p>static_idx = 0;

891 
p>temp_idx = 0;

892 
p>old_planes = p>planes[0];

893 
plane_size = mpi>h * p>stride; 
894 
chroma_plane_size = mpi>flags & MP_IMGFLAG_PLANAR ? 
895 
mpi>chroma_height * p>chroma_stride : 0;

896 
p>memory_allocated = 
897 
malloc(NUM_STORED * (plane_size+2*chroma_plane_size) +

898 
8*p>chroma_stride + 4096); 
899 
/* align to page boundary */

900 
plane = p>memory_allocated + ((long)p>memory_allocated & 4095); 
901 
memset(plane, 0, NUM_STORED * plane_size);

902 
los = p>crop_x + p>crop_y * p>stride; 
903 
cos = p>crop_cx + p>crop_cy * p>chroma_stride; 
904 
for (i = 0; i != NUM_STORED; i++, plane += plane_size) { 
905 
p>planes[i][0] = plane;

906 
p>planes[NUM_STORED + i][0] = plane + los;

907 
} 
908 
if (mpi>flags & MP_IMGFLAG_PLANAR) {

909 
p>nplanes = 3;

910 
memset(plane, 0x80, NUM_STORED * 2 * chroma_plane_size); 
911 
for (i = 0; i != NUM_STORED; i++) { 
912 
p>planes[i][1] = plane;

913 
p>planes[NUM_STORED + i][1] = plane + cos;

914 
plane += chroma_plane_size; 
915 
p>planes[i][2] = plane;

916 
p>planes[NUM_STORED + i][2] = plane + cos;

917 
plane += chroma_plane_size; 
918 
} 
919 
} 
920 
p>out_dec <<= 2;

921 
i = gcd(p>in_inc, p>out_dec); 
922 
p>in_inc /= i; 
923 
p>out_dec /= i; 
924 
p>iosync = 0;

925 
p>num_fields = 3;

926 
} 
927  
928 
static inline double get_time(void) 
929 
{ 
930 
struct timeval tv;

931 
gettimeofday(&tv, 0);

932 
return tv.tv_sec + tv.tv_usec * 1e6; 
933 
} 
934  
935 
static void get_image(struct vf_instance *vf, mp_image_t *mpi) 
936 
{ 
937 
struct vf_priv_s *p = vf>priv;

938 
static unsigned char **planes, planes_idx; 
939  
940 
if (mpi>type == MP_IMGTYPE_STATIC) return; 
941  
942 
if (!p>planes[0][0]) init(p, mpi); 
943  
944 
if (mpi>type == MP_IMGTYPE_TEMP 

945 
(mpi>type == MP_IMGTYPE_IPB && !(mpi>flags & MP_IMGFLAG_READABLE))) 
946 
planes_idx = NUM_STORED/2 + (++p>temp_idx % (NUM_STORED/2)); 
947 
else

948 
planes_idx = ++p>static_idx % (NUM_STORED/2);

949 
planes = p>planes[planes_idx]; 
950 
mpi>priv = p>planes[NUM_STORED + planes_idx]; 
951 
if (mpi>priv == p>old_planes) {

952 
unsigned char **old_planes = 
953 
p>planes[NUM_STORED + 2 + (++p>temp_idx & 1)]; 
954 
my_memcpy_pic(old_planes[0], p>old_planes[0], 
955 
p>w, p>h, p>stride, p>stride); 
956 
if (mpi>flags & MP_IMGFLAG_PLANAR) {

957 
my_memcpy_pic(old_planes[1], p>old_planes[1], 
958 
p>cw, p>ch, p>chroma_stride, p>chroma_stride); 
959 
my_memcpy_pic(old_planes[2], p>old_planes[2], 
960 
p>cw, p>ch, p>chroma_stride, p>chroma_stride); 
961 
} 
962 
p>old_planes = old_planes; 
963 
p>num_copies++; 
964 
} 
965 
mpi>planes[0] = planes[0]; 
966 
mpi>stride[0] = p>stride;

967 
if (mpi>flags & MP_IMGFLAG_PLANAR) {

968 
mpi>planes[1] = planes[1]; 
969 
mpi>planes[2] = planes[2]; 
970 
mpi>stride[1] = mpi>stride[2] = p>chroma_stride; 
971 
} 
972 
mpi>width = p>stride; 
973  
974 
mpi>flags = MP_IMGFLAG_DIRECT; 
975 
mpi>flags &= ~MP_IMGFLAG_DRAW_CALLBACK; 
976 
} 
977  
978 
static inline long 
979 
cmpe(unsigned long x, unsigned long y, unsigned long err, unsigned long e) 
980 
{ 
981 
long diff = xy;

982 
long unit = ((x+y+err) >> e);

983 
long ret = (diff > unit)  (diff < unit);

984 
unit >>= 1;

985 
return ret + (diff > unit)  (diff < unit);

986 
} 
987  
988 
static unsigned long 
989 
find_breaks(struct vf_priv_s *p, struct frame_stats *s) 
990 
{ 
991 
struct frame_stats *ps = &p>stats[(p>inframes1) & 1]; 
992 
long notfilm = 5*p>in_inc  p>out_dec; 
993 
unsigned long n = s>num_blocks >> 8; 
994 
unsigned long sad_comb_cmp = cmpe(s>sad.temp, s>sad.noise, 512, 1); 
995 
unsigned long ret = 8; 
996  
997 
if (cmpe(s>sad.temp, s>sad.even, 512, 1) > 0) 
998 
mp_msg(MSGT_VFILTER, MSGL_WARN, 
999 
"@@@@@@@@ Bottomfirst field??? @@@@@@@@\n");

1000 
if (s>sad.temp > 1000 && s>sad.noise > 1000) 
1001 
return 3; 
1002 
if (s>interlaced_high >= 2*n && s>sad.temp > 256 && s>sad.noise > 256) 
1003 
return 3; 
1004 
if (s>high.noise > s>num_blocks/4 && s>sad.noise > 10000 && 
1005 
s>sad.noise > 2*s>sad.even && s>sad.noise > 2*ps>sad.odd) { 
1006 
// Midframe scene change

1007 
if (s>tiny.temp + s>interlaced_low < n 

1008 
s>low.temp + s>interlaced_high < n/4 

1009 
s>high.temp + s>interlaced_high < n/8 

1010 
s>sad.temp < 160)

1011 
return 1; 
1012 
return 3; 
1013 
} 
1014 
if (s>high.temp > s>num_blocks/4 && s>sad.temp > 10000 && 
1015 
s>sad.temp > 2*ps>sad.odd && s>sad.temp > 2*ps>sad.even) { 
1016 
// Start frame scene change

1017 
if (s>tiny.noise + s>interlaced_low < n 

1018 
s>low.noise + s>interlaced_high < n/4 

1019 
s>high.noise + s>interlaced_high < n/8 

1020 
s>sad.noise < 160)

1021 
return 2; 
1022 
return 3; 
1023 
} 
1024 
if (sad_comb_cmp == 2) 
1025 
return 2; 
1026 
if (sad_comb_cmp == 2) 
1027 
return 1; 
1028  
1029 
if (s>tiny.odd > 3*MAX(n,s>tiny.even) + s>interlaced_low) 
1030 
return 1; 
1031 
if (s>tiny.even > 3*MAX(n,s>tiny.odd)+s>interlaced_low && 
1032 
(!sad_comb_cmp  (s>low.noise <= n/4 && s>low.temp <= n/4))) 
1033 
return 4; 
1034  
1035 
if (s>sad.noise < 64 && s>sad.temp < 64 && 
1036 
s>low.noise <= n/2 && s>high.noise <= n/4 && 
1037 
s>low.temp <= n/2 && s>high.temp <= n/4) 
1038 
goto still;

1039  
1040 
if (s>tiny.temp > 3*MAX(n,s>tiny.noise) + s>interlaced_low) 
1041 
return 2; 
1042 
if (s>tiny.noise > 3*MAX(n,s>tiny.temp) + s>interlaced_low) 
1043 
return 1; 
1044  
1045 
if (s>low.odd > 3*MAX(n/4,s>low.even) + s>interlaced_high) 
1046 
return 1; 
1047 
if (s>low.even > 3*MAX(n/4,s>low.odd)+s>interlaced_high && 
1048 
s>sad.even > 2*s>sad.odd &&

1049 
(!sad_comb_cmp  (s>low.noise <= n/4 && s>low.temp <= n/4))) 
1050 
return 4; 
1051  
1052 
if (s>low.temp > 3*MAX(n/4,s>low.noise) + s>interlaced_high) 
1053 
return 2; 
1054 
if (s>low.noise > 3*MAX(n/4,s>low.temp) + s>interlaced_high) 
1055 
return 1; 
1056  
1057 
if (sad_comb_cmp == 1 && s>sad.noise < 64) 
1058 
return 2; 
1059 
if (sad_comb_cmp == 1 && s>sad.temp < 64) 
1060 
return 1; 
1061  
1062 
if (s>tiny.odd <= n  (s>tiny.noise <= n/2 && s>tiny.temp <= n/2)) { 
1063 
if (s>interlaced_low <= n) {

1064 
if (p>num_fields == 1) 
1065 
goto still;

1066 
if (s>tiny.even <= n  ps>tiny.noise <= n/2) 
1067 
/* Still frame */

1068 
goto still;

1069 
if (s>bigger.even >= 2*MAX(n,s>bigger.odd) + s>interlaced_low) 
1070 
return 4; 
1071 
if (s>low.even >= 2*n + s>interlaced_low) 
1072 
return 4; 
1073 
goto still;

1074 
} 
1075 
} 
1076 
if (s>low.odd <= n/4) { 
1077 
if (s>interlaced_high <= n/4) { 
1078 
if (p>num_fields == 1) 
1079 
goto still;

1080 
if (s>low.even <= n/4) 
1081 
/* Still frame */

1082 
goto still;

1083 
if (s>bigger.even >= 2*MAX(n/4,s>bigger.odd)+s>interlaced_high) 
1084 
return 4; 
1085 
if (s>low.even >= n/2 + s>interlaced_high) 
1086 
return 4; 
1087 
goto still;

1088 
} 
1089 
} 
1090 
if (s>bigger.temp > 2*MAX(n,s>bigger.noise) + s>interlaced_low) 
1091 
return 2; 
1092 
if (s>bigger.noise > 2*MAX(n,s>bigger.temp) + s>interlaced_low) 
1093 
return 1; 
1094 
if (s>bigger.temp > 2*MAX(n,s>bigger.noise) + s>interlaced_high) 
1095 
return 2; 
1096 
if (s>bigger.noise > 2*MAX(n,s>bigger.temp) + s>interlaced_high) 
1097 
return 1; 
1098 
if (s>twox.temp > 2*MAX(n,s>twox.noise) + s>interlaced_high) 
1099 
return 2; 
1100 
if (s>twox.noise > 2*MAX(n,s>twox.temp) + s>interlaced_high) 
1101 
return 1; 
1102 
if (s>bigger.even > 2*MAX(n,s>bigger.odd) + s>interlaced_low && 
1103 
s>bigger.temp < n && s>bigger.noise < n) 
1104 
return 4; 
1105 
if (s>interlaced_low > MIN(2*n, s>tiny.odd)) 
1106 
return 3; 
1107 
ret = 8 + (1 << (s>sad.temp > s>sad.noise)); 
1108 
still:

1109 
if (p>num_fields == 1 && p>prev_fields == 3 && notfilm >= 0 && 
1110 
(s>tiny.temp <= s>tiny.noise  s>sad.temp < s>sad.noise+16))

1111 
return 1; 
1112 
if (p>notout < p>num_fields && p>iosync > 2*p>in_inc && notfilm < 0) 
1113 
notfilm = 0;

1114 
if (p>num_fields < 2  
1115 
(p>num_fields == 2 && p>prev_fields == 2 && notfilm < 0)) 
1116 
return ret;

1117 
if (!notfilm && (p>prev_fields&~1) == 2) { 
1118 
if (p>prev_fields + p>num_fields == 5) { 
1119 
if (s>tiny.noise <= s>tiny.temp 

1120 
s>low.noise == 0  s>low.noise < s>low.temp 

1121 
s>sad.noise < s>sad.temp+16)

1122 
return 2; 
1123 
} 
1124 
if (p>prev_fields + p>num_fields == 4) { 
1125 
if (s>tiny.temp <= s>tiny.noise 

1126 
s>low.temp == 0  s>low.temp < s>low.noise 

1127 
s>sad.temp < s>sad.noise+16)

1128 
return 1; 
1129 
} 
1130 
} 
1131 
if (p>num_fields > 2 && 
1132 
ps>sad.noise > s>sad.noise && ps>sad.noise > s>sad.temp) 
1133 
return 4; 
1134 
return 2 >> (s>sad.noise > s>sad.temp); 
1135 
} 
1136  
1137 
#define ITOC(X) (!(X) ? ' ' : (X) + ((X)>9 ? 'a'10 : '0')) 
1138  
1139 
static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts) 
1140 
{ 
1141 
mp_image_t *dmpi; 
1142 
struct vf_priv_s *p = vf>priv;

1143 
unsigned char **planes, **old_planes; 
1144 
struct frame_stats *s = &p>stats[p>inframes & 1]; 
1145 
struct frame_stats *ps = &p>stats[(p>inframes1) & 1]; 
1146 
int swapped = 0; 
1147 
const int flags = mpi>fields; 
1148 
int breaks, prev;

1149 
int show_fields = 0; 
1150 
int dropped_fields = 0; 
1151 
double start_time, diff_time;

1152 
char prev_chflag = p>chflag;

1153 
int keep_rate;

1154  
1155 
if (!p>planes[0][0]) init(p, mpi); 
1156  
1157 
old_planes = p>old_planes; 
1158  
1159 
if ((mpi>flags & MP_IMGFLAG_DIRECT) && mpi>priv) {

1160 
planes = mpi>priv; 
1161 
mpi>priv = 0;

1162 
} else {

1163 
planes = p>planes[2 + (++p>temp_idx & 1)]; 
1164 
my_memcpy_pic(planes[0],

1165 
mpi>planes[0] + p>crop_x + p>crop_y * mpi>stride[0], 
1166 
p>w, p>h, p>stride, mpi>stride[0]);

1167 
if (mpi>flags & MP_IMGFLAG_PLANAR) {

1168 
my_memcpy_pic(planes[1],

1169 
mpi>planes[1] + p>crop_cx + p>crop_cy * mpi>stride[1], 
1170 
p>cw, p>ch, p>chroma_stride, mpi>stride[1]);

1171 
my_memcpy_pic(planes[2],

1172 
mpi>planes[2] + p>crop_cx + p>crop_cy * mpi>stride[2], 
1173 
p>cw, p>ch, p>chroma_stride, mpi>stride[2]);

1174 
p>num_copies++; 
1175 
} 
1176 
} 
1177  
1178 
p>old_planes = planes; 
1179 
p>chflag = ';';

1180 
if (flags & MP_IMGFIELD_ORDERED) {

1181 
swapped = !(flags & MP_IMGFIELD_TOP_FIRST); 
1182 
p>chflag = (flags & MP_IMGFIELD_REPEAT_FIRST ? '' :

1183 
flags & MP_IMGFIELD_TOP_FIRST ? ':' : '.'); 
1184 
} 
1185 
p>swapped = swapped; 
1186  
1187 
start_time = get_time(); 
1188 
if (p>chflag == '') { 
1189 
*s = ppzs; 
1190 
p>iosync += p>in_inc; 
1191 
} else if ((p>fast & 1) && prev_chflag == '') 
1192 
*s = pprs; 
1193 
else

1194 
diff_fields(p, s, old_planes, planes); 
1195 
diff_time = get_time(); 
1196 
p>diff_time += diff_time  start_time; 
1197 
breaks = p>inframes ? find_breaks(p, s) : 2;

1198 
p>inframes++; 
1199 
keep_rate = 4*p>in_inc == p>out_dec;

1200  
1201 
switch (breaks) {

1202 
case 0: 
1203 
case 8: 
1204 
case 9: 
1205 
case 10: 
1206 
if (!keep_rate && p>notout < p>num_fields && p>iosync < 2*p>in_inc) 
1207 
break;

1208 
if (p>notout < p>num_fields)

1209 
dropped_fields = 2;

1210 
case 4: 
1211 
if (keep_rate  p>iosync >= 2*p>in_inc) 
1212 
show_fields = (4<<p>num_fields)1; 
1213 
break;

1214 
case 3: 
1215 
if (keep_rate)

1216 
show_fields = 2;

1217 
else if (p>iosync > 0) { 
1218 
if (p>notout >= p>num_fields && p>iosync > 2*p>in_inc) { 
1219 
show_fields = 4; /* prev odd only */ 
1220 
if (p>num_fields > 1) 
1221 
show_fields = 8; /* + prev even */ 
1222 
} else {

1223 
show_fields = 2; /* even only */ 
1224 
if (p>notout >= p>num_fields)

1225 
dropped_fields += p>num_fields; 
1226 
} 
1227 
} 
1228 
break;

1229 
case 2: 
1230 
if (p>iosync <= 3*p>in_inc) { 
1231 
if (p>notout >= p>num_fields)

1232 
dropped_fields = p>num_fields; 
1233 
break;

1234 
} 
1235 
if (p>num_fields == 1) { 
1236 
int prevbreak = ps>sad.noise >= 128; 
1237 
if (p>iosync < 4*p>in_inc) { 
1238 
show_fields = 3;

1239 
dropped_fields = prevbreak; 
1240 
} else {

1241 
show_fields = 4  (!prevbreak << 3); 
1242 
if (p>notout < 1 + p>prev_fields) 
1243 
dropped_fields = !prevbreak; 
1244 
} 
1245 
break;

1246 
} 
1247 
default:

1248 
if (keep_rate)

1249 
show_fields = 3 << (breaks & 1); 
1250 
else if (p>notout >= p>num_fields && 
1251 
p>iosync >= (breaks == 1 ? p>in_inc :

1252 
p>in_inc << (p>num_fields == 1))) {

1253 
show_fields = (1 << (2 + p>num_fields))  (1<<breaks); 
1254 
} else {

1255 
if (p>notout >= p>num_fields)

1256 
dropped_fields += p>num_fields + 2  breaks;

1257 
if (breaks == 1) { 
1258 
if (p>iosync >= 4*p>in_inc) 
1259 
show_fields = 6;

1260 
} else if (p>iosync > 3*p>in_inc) 
1261 
show_fields = 3; /* odd+even */ 
1262 
} 
1263 
break;

1264 
} 
1265  
1266 
show_fields &= 15;

1267 
prev = p>prev_fields; 
1268 
if (breaks < 8) { 
1269 
if (p>num_fields == 1) 
1270 
breaks &= ~4;

1271 
if (breaks)

1272 
p>num_breaks++; 
1273 
if (breaks == 3) 
1274 
p>prev_fields = p>num_fields = 1;

1275 
else if (breaks) { 
1276 
p>prev_fields = p>num_fields + (breaks==1)  (breaks==4); 
1277 
p>num_fields = breaks  (breaks == 4) + (p>chflag == ''); 
1278 
} else

1279 
p>num_fields += 2;

1280 
} else

1281 
p>num_fields += 2;

1282  
1283 
p>iosync += 4 * p>in_inc;

1284 
if (p>chflag == '') 
1285 
p>iosync += p>in_inc; 
1286  
1287 
if (show_fields) {

1288 
p>iosync = p>out_dec; 
1289 
p>notout = !(show_fields & 1) + !(show_fields & 3); 
1290 
if (((show_fields & 3) == 3 && 
1291 
(s>low.noise + s>interlaced_low < (s>num_blocks>>8) 

1292 
s>sad.noise < 160)) 

1293 
((show_fields & 12) == 12 && 
1294 
(ps>low.noise + ps>interlaced_low < (s>num_blocks>>8) 

1295 
ps>sad.noise < 160))) {

1296 
p>export_count++; 
1297 
dmpi = vf_get_image(vf>next, mpi>imgfmt, MP_IMGTYPE_EXPORT, 
1298 
MP_IMGFLAG_PRESERVEMP_IMGFLAG_READABLE, 
1299 
p>w, p>h); 
1300 
if ((show_fields & 3) != 3) planes = old_planes; 
1301 
dmpi>planes[0] = planes[0]; 
1302 
dmpi>stride[0] = p>stride;

1303 
dmpi>width = mpi>width; 
1304 
if (mpi>flags & MP_IMGFLAG_PLANAR) {

1305 
dmpi>planes[1] = planes[1]; 
1306 
dmpi>planes[2] = planes[2]; 
1307 
dmpi>stride[1] = p>chroma_stride;

1308 
dmpi>stride[2] = p>chroma_stride;

1309 
} 
1310 
} else {

1311 
p>merge_count++; 
1312 
dmpi = vf_get_image(vf>next, mpi>imgfmt, 
1313 
MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE, 
1314 
p>w, p>h); 
1315 
copy_merge_fields(p, dmpi, old_planes, planes, show_fields); 
1316 
} 
1317 
p>outframes++; 
1318 
} else

1319 
p>notout += 2;

1320  
1321 
if (p>verbose)

1322 
mp_msg(MSGT_VFILTER, MSGL_INFO, "%lu %lu: %x %c %c %lu%s%s%c%s\n",

1323 
p>inframes, p>outframes, 
1324 
breaks, breaks<8 && breaks>0 ? (int) p>prev_fields+'0' : ' ', 
1325 
ITOC(show_fields), 
1326 
p>num_breaks, 5*p>in_inc == p>out_dec && breaks<8 && 
1327 
breaks>0 && ((prev&~1)!=2  prev+p>prev_fields!=5) ? 
1328 
" ######## bad telecine ########" : "", 
1329 
dropped_fields ? " ======== dropped ":"", ITOC(dropped_fields), 
1330 
!show_fields  (show_fields & (show_fields1)) ?

1331 
"" : " @@@@@@@@@@@@@@@@@"); 
1332  
1333 
p>merge_time += get_time()  diff_time; 
1334 
return show_fields ? vf_next_put_image(vf, dmpi, MP_NOPTS_VALUE) : 0; 
1335 
} 
1336  
1337 
static int query_format(struct vf_instance *vf, unsigned int fmt) 
1338 
{ 
1339 
/* FIXME  support more formats */

1340 
switch (fmt) {

1341 
case IMGFMT_YV12:

1342 
case IMGFMT_IYUV:

1343 
case IMGFMT_I420:

1344 
case IMGFMT_411P:

1345 
case IMGFMT_422P:

1346 
case IMGFMT_444P:

1347 
return vf_next_query_format(vf, fmt);

1348 
} 
1349 
return 0; 
1350 
} 
1351  
1352 
static int config(struct vf_instance *vf, 
1353 
int width, int height, int d_width, int d_height, 
1354 
unsigned int flags, unsigned int outfmt) 
1355 
{ 
1356 
unsigned long cxm = 0; 
1357 
unsigned long cym = 0; 
1358 
struct vf_priv_s *p = vf>priv;

1359 
// rounding:

1360 
if(!IMGFMT_IS_RGB(outfmt) && !IMGFMT_IS_BGR(outfmt)){

1361 
switch(outfmt){

1362 
case IMGFMT_444P:

1363 
case IMGFMT_Y800:

1364 
case IMGFMT_Y8:

1365 
break;

1366 
case IMGFMT_YVU9:

1367 
case IMGFMT_IF09:

1368 
cym = 3;

1369 
case IMGFMT_411P:

1370 
cxm = 3;

1371 
break;

1372 
case IMGFMT_YV12:

1373 
case IMGFMT_I420:

1374 
case IMGFMT_IYUV:

1375 
cym = 1;

1376 
default:

1377 
cxm = 1;

1378 
} 
1379 
} 
1380 
p>chroma_swapped = !!(p>crop_y & (cym+1));

1381 
if (p>w) p>w += p>crop_x & cxm;

1382 
if (p>h) p>h += p>crop_y & cym;

1383 
p>crop_x &= ~cxm; 
1384 
p>crop_y &= ~cym; 
1385 
if (!p>w  p>w > width ) p>w = width;

1386 
if (!p>h  p>h > height) p>h = height;

1387 
if (p>crop_x + p>w > width ) p>crop_x = 0; 
1388 
if (p>crop_y + p>h > height) p>crop_y = 0; 
1389  
1390 
if(!opt_screen_size_x && !opt_screen_size_y){

1391 
d_width = d_width * p>w/width; 
1392 
d_height = d_height * p>h/height; 
1393 
} 
1394 
return vf_next_config(vf, p>w, p>h, d_width, d_height, flags, outfmt);

1395 
} 
1396  
1397 
static void uninit(struct vf_instance *vf) 
1398 
{ 
1399 
struct vf_priv_s *p = vf>priv;

1400 
mp_msg(MSGT_VFILTER, MSGL_INFO, "diff_time: %.3f, merge_time: %.3f, "

1401 
"export: %lu, merge: %lu, copy: %lu\n", p>diff_time, p>merge_time,

1402 
p>export_count, p>merge_count, p>num_copies); 
1403 
free(p>memory_allocated); 
1404 
free(p); 
1405 
} 
1406  
1407 
static int vf_open(vf_instance_t *vf, char *args) 
1408 
{ 
1409 
struct vf_priv_s *p;

1410 
vf>get_image = get_image; 
1411 
vf>put_image = put_image; 
1412 
vf>config = config; 
1413 
vf>query_format = query_format; 
1414 
vf>uninit = uninit; 
1415 
vf>default_reqs = VFCAP_ACCEPT_STRIDE; 
1416 
vf>priv = p = calloc(1, sizeof(struct vf_priv_s)); 
1417 
p>out_dec = 5;

1418 
p>in_inc = 4;

1419 
p>thres.noise = 128;

1420 
p>thres.even = 128;

1421 
p>sad_thres = 64;

1422 
p>dint_thres = 4;

1423 
p>luma_only = 0;

1424 
p>fast = 3;

1425 
p>mmx2 = gCpuCaps.hasMMX2 ? 1 : gCpuCaps.has3DNow ? 2 : 0; 
1426 
if (args) {

1427 
const char *args_remain = parse_args(p, args); 
1428 
if (args_remain) {

1429 
mp_msg(MSGT_VFILTER, MSGL_FATAL, 
1430 
"filmdint: unknown suboption: %s\n", args_remain);

1431 
return 0; 
1432 
} 
1433 
if (p>out_dec < p>in_inc) {

1434 
mp_msg(MSGT_VFILTER, MSGL_FATAL, 
1435 
"filmdint: increasing the frame rate is not supported\n");

1436 
return 0; 
1437 
} 
1438 
} 
1439 
if (p>mmx2 > 2) 
1440 
p>mmx2 = 0;

1441 
#if !HAVE_MMX

1442 
p>mmx2 = 0;

1443 
#endif

1444 
#if !HAVE_AMD3DNOW

1445 
p>mmx2 &= 1;

1446 
#endif

1447 
p>thres.odd = p>thres.even; 
1448 
p>thres.temp = p>thres.noise; 
1449 
p>diff_time = 0;

1450 
p>merge_time = 0;

1451 
return 1; 
1452 
} 
1453  
1454 
const vf_info_t vf_info_filmdint = {

1455 
"Advanced inverse telecine filer",

1456 
"filmdint",

1457 
"Zoltan Hidvegi",

1458 
"",

1459 
vf_open, 
1460 
NULL

1461 
}; 