Statistics
| Branch: | Revision:

ffmpeg / libavcodec / motion_est.c @ 0d21a846

History | View | Annotate | Download (56.2 KB)

1
/*
2
 * Motion estimation 
3
 * Copyright (c) 2000,2001 Fabrice Bellard.
4
 * Copyright (c) 2002 Michael Niedermayer
5
 * 
6
 *
7
 * This library is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2 of the License, or (at your option) any later version.
11
 *
12
 * This library is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with this library; if not, write to the Free Software
19
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20
 *
21
 * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
22
 */
23
#include <stdlib.h>
24
#include <stdio.h>
25
#include "avcodec.h"
26
#include "dsputil.h"
27
#include "mpegvideo.h"
28

    
29
#define SQ(a) ((a)*(a))
30
#define INTER_BIAS        257
31

    
32
#define P_LAST P[0]
33
#define P_LEFT P[1]
34
#define P_TOP P[2]
35
#define P_TOPRIGHT P[3]
36
#define P_MEDIAN P[4]
37
#define P_LAST_LEFT P[5]
38
#define P_LAST_RIGHT P[6]
39
#define P_LAST_TOP P[7]
40
#define P_LAST_BOTTOM P[8]
41
#define P_MV1 P[9]
42

    
43

    
44
static int pix_sum(UINT8 * pix, int line_size)
45
{
46
    int s, i, j;
47

    
48
    s = 0;
49
    for (i = 0; i < 16; i++) {
50
        for (j = 0; j < 16; j += 8) {
51
            s += pix[0];
52
            s += pix[1];
53
            s += pix[2];
54
            s += pix[3];
55
            s += pix[4];
56
            s += pix[5];
57
            s += pix[6];
58
            s += pix[7];
59
            pix += 8;
60
        }
61
        pix += line_size - 16;
62
    }
63
    return s;
64
}
65

    
66
static int pix_dev(UINT8 * pix, int line_size, int mean)
67
{
68
    int s, i, j;
69

    
70
    s = 0;
71
    for (i = 0; i < 16; i++) {
72
        for (j = 0; j < 16; j += 8) {
73
            s += ABS(pix[0]-mean);
74
            s += ABS(pix[1]-mean);
75
            s += ABS(pix[2]-mean);
76
            s += ABS(pix[3]-mean);
77
            s += ABS(pix[4]-mean);
78
            s += ABS(pix[5]-mean);
79
            s += ABS(pix[6]-mean);
80
            s += ABS(pix[7]-mean);
81
            pix += 8;
82
        }
83
        pix += line_size - 16;
84
    }
85
    return s;
86
}
87

    
88
static int pix_norm1(UINT8 * pix, int line_size)
89
{
90
    int s, i, j;
91
    UINT32 *sq = squareTbl + 256;
92

    
93
    s = 0;
94
    for (i = 0; i < 16; i++) {
95
        for (j = 0; j < 16; j += 8) {
96
            s += sq[pix[0]];
97
            s += sq[pix[1]];
98
            s += sq[pix[2]];
99
            s += sq[pix[3]];
100
            s += sq[pix[4]];
101
            s += sq[pix[5]];
102
            s += sq[pix[6]];
103
            s += sq[pix[7]];
104
            pix += 8;
105
        }
106
        pix += line_size - 16;
107
    }
108
    return s;
109
}
110

    
111
static int pix_norm(UINT8 * pix1, UINT8 * pix2, int line_size)
112
{
113
    int s, i, j;
114
    UINT32 *sq = squareTbl + 256;
115

    
116
    s = 0;
117
    for (i = 0; i < 16; i++) {
118
        for (j = 0; j < 16; j += 8) {
119
            s += sq[pix1[0] - pix2[0]];
120
            s += sq[pix1[1] - pix2[1]];
121
            s += sq[pix1[2] - pix2[2]];
122
            s += sq[pix1[3] - pix2[3]];
123
            s += sq[pix1[4] - pix2[4]];
124
            s += sq[pix1[5] - pix2[5]];
125
            s += sq[pix1[6] - pix2[6]];
126
            s += sq[pix1[7] - pix2[7]];
127
            pix1 += 8;
128
            pix2 += 8;
129
        }
130
        pix1 += line_size - 16;
131
        pix2 += line_size - 16;
132
    }
133
    return s;
134
}
135

    
136
static void no_motion_search(MpegEncContext * s,
137
                             int *mx_ptr, int *my_ptr)
138
{
139
    *mx_ptr = 16 * s->mb_x;
140
    *my_ptr = 16 * s->mb_y;
141
}
142

    
143
static int full_motion_search(MpegEncContext * s,
144
                              int *mx_ptr, int *my_ptr, int range,
145
                              int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
146
{
147
    int x1, y1, x2, y2, xx, yy, x, y;
148
    int mx, my, dmin, d;
149
    UINT8 *pix;
150

    
151
    xx = 16 * s->mb_x;
152
    yy = 16 * s->mb_y;
153
    x1 = xx - range + 1;        /* we loose one pixel to avoid boundary pb with half pixel pred */
154
    if (x1 < xmin)
155
        x1 = xmin;
156
    x2 = xx + range - 1;
157
    if (x2 > xmax)
158
        x2 = xmax;
159
    y1 = yy - range + 1;
160
    if (y1 < ymin)
161
        y1 = ymin;
162
    y2 = yy + range - 1;
163
    if (y2 > ymax)
164
        y2 = ymax;
165
    pix = s->new_picture[0] + (yy * s->linesize) + xx;
166
    dmin = 0x7fffffff;
167
    mx = 0;
168
    my = 0;
169
    for (y = y1; y <= y2; y++) {
170
        for (x = x1; x <= x2; x++) {
171
            d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x,
172
                             s->linesize);
173
            if (d < dmin ||
174
                (d == dmin &&
175
                 (abs(x - xx) + abs(y - yy)) <
176
                 (abs(mx - xx) + abs(my - yy)))) {
177
                dmin = d;
178
                mx = x;
179
                my = y;
180
            }
181
        }
182
    }
183

    
184
    *mx_ptr = mx;
185
    *my_ptr = my;
186

    
187
#if 0
188
    if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
189
        *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
190
        fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
191
    }
192
#endif
193
    return dmin;
194
}
195

    
196

    
197
static int log_motion_search(MpegEncContext * s,
198
                             int *mx_ptr, int *my_ptr, int range,
199
                             int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
200
{
201
    int x1, y1, x2, y2, xx, yy, x, y;
202
    int mx, my, dmin, d;
203
    UINT8 *pix;
204

    
205
    xx = s->mb_x << 4;
206
    yy = s->mb_y << 4;
207

    
208
    /* Left limit */
209
    x1 = xx - range;
210
    if (x1 < xmin)
211
        x1 = xmin;
212

    
213
    /* Right limit */
214
    x2 = xx + range;
215
    if (x2 > xmax)
216
        x2 = xmax;
217

    
218
    /* Upper limit */
219
    y1 = yy - range;
220
    if (y1 < ymin)
221
        y1 = ymin;
222

    
223
    /* Lower limit */
224
    y2 = yy + range;
225
    if (y2 > ymax)
226
        y2 = ymax;
227

    
228
    pix = s->new_picture[0] + (yy * s->linesize) + xx;
229
    dmin = 0x7fffffff;
230
    mx = 0;
231
    my = 0;
232

    
233
    do {
234
        for (y = y1; y <= y2; y += range) {
235
            for (x = x1; x <= x2; x += range) {
236
                d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
237
                if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
238
                    dmin = d;
239
                    mx = x;
240
                    my = y;
241
                }
242
            }
243
        }
244

    
245
        range = range >> 1;
246

    
247
        x1 = mx - range;
248
        if (x1 < xmin)
249
            x1 = xmin;
250

    
251
        x2 = mx + range;
252
        if (x2 > xmax)
253
            x2 = xmax;
254

    
255
        y1 = my - range;
256
        if (y1 < ymin)
257
            y1 = ymin;
258

    
259
        y2 = my + range;
260
        if (y2 > ymax)
261
            y2 = ymax;
262

    
263
    } while (range >= 1);
264

    
265
#ifdef DEBUG
266
    fprintf(stderr, "log       - MX: %d\tMY: %d\n", mx, my);
267
#endif
268
    *mx_ptr = mx;
269
    *my_ptr = my;
270
    return dmin;
271
}
272

    
273
static int phods_motion_search(MpegEncContext * s,
274
                               int *mx_ptr, int *my_ptr, int range,
275
                               int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
276
{
277
    int x1, y1, x2, y2, xx, yy, x, y, lastx, d;
278
    int mx, my, dminx, dminy;
279
    UINT8 *pix;
280

    
281
    xx = s->mb_x << 4;
282
    yy = s->mb_y << 4;
283

    
284
    /* Left limit */
285
    x1 = xx - range;
286
    if (x1 < xmin)
287
        x1 = xmin;
288

    
289
    /* Right limit */
290
    x2 = xx + range;
291
    if (x2 > xmax)
292
        x2 = xmax;
293

    
294
    /* Upper limit */
295
    y1 = yy - range;
296
    if (y1 < ymin)
297
        y1 = ymin;
298

    
299
    /* Lower limit */
300
    y2 = yy + range;
301
    if (y2 > ymax)
302
        y2 = ymax;
303

    
304
    pix = s->new_picture[0] + (yy * s->linesize) + xx;
305
    mx = 0;
306
    my = 0;
307

    
308
    x = xx;
309
    y = yy;
310
    do {
311
        dminx = 0x7fffffff;
312
        dminy = 0x7fffffff;
313

    
314
        lastx = x;
315
        for (x = x1; x <= x2; x += range) {
316
            d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
317
            if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
318
                dminx = d;
319
                mx = x;
320
            }
321
        }
322

    
323
        x = lastx;
324
        for (y = y1; y <= y2; y += range) {
325
            d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
326
            if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
327
                dminy = d;
328
                my = y;
329
            }
330
        }
331

    
332
        range = range >> 1;
333

    
334
        x = mx;
335
        y = my;
336
        x1 = mx - range;
337
        if (x1 < xmin)
338
            x1 = xmin;
339

    
340
        x2 = mx + range;
341
        if (x2 > xmax)
342
            x2 = xmax;
343

    
344
        y1 = my - range;
345
        if (y1 < ymin)
346
            y1 = ymin;
347

    
348
        y2 = my + range;
349
        if (y2 > ymax)
350
            y2 = ymax;
351

    
352
    } while (range >= 1);
353

    
354
#ifdef DEBUG
355
    fprintf(stderr, "phods     - MX: %d\tMY: %d\n", mx, my);
356
#endif
357

    
358
    /* half pixel search */
359
    *mx_ptr = mx;
360
    *my_ptr = my;
361
    return dminy;
362
}
363

    
364

    
365
#define Z_THRESHOLD 256
366

    
367
#define CHECK_MV(x,y)\
368
{\
369
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
370
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
371
    if(map[index]!=key){\
372
        d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
373
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
374
        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
375
        map[index]= key;\
376
        score_map[index]= d;\
377
    }\
378
}
379

    
380
#define CHECK_MV_DIR(x,y,new_dir)\
381
{\
382
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
383
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
384
    if(map[index]!=key){\
385
        d = pix_abs(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
386
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
387
        if(d<dmin){\
388
            best[0]=x;\
389
            best[1]=y;\
390
            dmin=d;\
391
            next_dir= new_dir;\
392
        }\
393
        map[index]= key;\
394
        score_map[index]= d;\
395
    }\
396
}
397

    
398
#define CHECK_MV4(x,y)\
399
{\
400
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
401
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
402
    if(map[index]!=key){\
403
        d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
404
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
405
        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
406
        map[index]= key;\
407
        score_map[index]= d;\
408
    }\
409
}
410

    
411
#define check(x,y,S,v)\
412
if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
413
if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
414
if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
415
if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
416

    
417

    
418
static inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
419
                                       UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
420
                                       int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
421
                                       int xmin, int ymin, int xmax, int ymax, int shift,
422
                                       uint32_t *map, uint16_t *score_map, int map_generation,
423
                                       op_pixels_abs_func pix_abs)
424
{
425
    int next_dir=-1;
426

    
427
    for(;;){
428
        int d;
429
        const int dir= next_dir;
430
        const int x= best[0];
431
        const int y= best[1];
432
        next_dir=-1;
433

    
434
//printf("%d", dir);
435
        if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
436
        if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
437
        if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
438
        if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
439

    
440
        if(next_dir==-1){
441
            return dmin;
442
        }
443
    }
444

    
445
/*    for(;;){
446
        int d;
447
        const int x= best[0];
448
        const int y= best[1];
449
        const int last_min=dmin;
450
        if(x>xmin) CHECK_MV(x-1, y  )
451
        if(y>xmin) CHECK_MV(x  , y-1)
452
        if(x<xmax) CHECK_MV(x+1, y  )
453
        if(y<xmax) CHECK_MV(x  , y+1)
454
        if(x>xmin && y>ymin) CHECK_MV(x-1, y-1)
455
        if(x>xmin && y<ymax) CHECK_MV(x-1, y+1)
456
        if(x<xmax && y>ymin) CHECK_MV(x+1, y-1)
457
        if(x<xmax && y<ymax) CHECK_MV(x+1, y+1)
458
        if(x-1>xmin) CHECK_MV(x-2, y  )
459
        if(y-1>xmin) CHECK_MV(x  , y-2)
460
        if(x+1<xmax) CHECK_MV(x+2, y  )
461
        if(y+1<xmax) CHECK_MV(x  , y+2)
462
        if(x-1>xmin && y-1>ymin) CHECK_MV(x-2, y-2)
463
        if(x-1>xmin && y+1<ymax) CHECK_MV(x-2, y+2)
464
        if(x+1<xmax && y-1>ymin) CHECK_MV(x+2, y-2)
465
        if(x+1<xmax && y+1<ymax) CHECK_MV(x+2, y+2)
466
        if(dmin==last_min) return dmin;
467
    }
468
    */
469
}
470

    
471
#if 1
472
#define SNAKE_1 3
473
#define SNAKE_2 2
474
#else
475
#define SNAKE_1 7
476
#define SNAKE_2 3
477
#endif
478
static inline int snake_search(MpegEncContext * s, int *best, int dmin,
479
                                       UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
480
                                       int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
481
                                       int xmin, int ymin, int xmax, int ymax, int shift,
482
                                       uint32_t *map, uint16_t *score_map,int map_generation,
483
                                       op_pixels_abs_func pix_abs)
484
{
485
    int dir=0;
486
    int c=1;
487
    static int x_dir[8]= {1,1,0,-1,-1,-1, 0, 1};
488
    static int y_dir[8]= {0,1,1, 1, 0,-1,-1,-1};
489
    int fails=0;
490
    int last_d[2]={dmin, dmin};
491

    
492
/*static int good=0;
493
static int bad=0;
494
static int point=0;
495

496
point++;
497
if(256*256*256*64%point==0)
498
{
499
    printf("%d %d %d\n", good, bad, point);
500
}*/
501

    
502
    for(;;){
503
        int x= best[0];
504
        int y= best[1];
505
        int d;
506
        x+=x_dir[dir];
507
        y+=y_dir[dir];
508
        if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){
509
            const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;
510
            const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);
511
            if(map[index]!=key){
512
                d = pix_abs(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);
513
                d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;
514
                map[index]=key;
515
                score_map[index]=d;
516
            }else
517
                d= dmin+1;
518
        }else{
519
            d = dmin + 10000; //FIXME smarter boundary handling
520
        }
521
        if(d<dmin){
522
            best[0]=x;
523
            best[1]=y;
524
            dmin=d;
525

    
526
            if(last_d[1] - last_d[0] > last_d[0] - d) c= -c;
527
            dir+=c;
528

    
529
            fails=0;
530
//good++;
531
            last_d[1]=last_d[0];
532
            last_d[0]=d;
533
        }else{
534
//bad++;
535
            if(fails){
536
                if(fails>=SNAKE_1+1) return dmin;
537
            }else{
538
                if(dir&1) dir-= c*3;
539
                else      c= -c;
540
//                c= -c;
541
            }
542
            dir+=c*SNAKE_2;
543
            fails++;
544
        }
545
        dir&=7;
546
    }
547
}
548

    
549
static inline int cross_search(MpegEncContext * s, int *best, int dmin,
550
                                       UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
551
                                       int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
552
                                       int xmin, int ymin, int xmax, int ymax, int shift,
553
                                       uint32_t *map, uint16_t *score_map,int map_generation,
554
                                       op_pixels_abs_func pix_abs)
555
{
556
    static int x_dir[4]= {-1, 0, 1, 0};
557
    static int y_dir[4]= { 0,-1, 0, 1};
558
    int improvement[2]={100000, 100000};
559
    int dirs[2]={2, 3};
560
    int dir;
561
    int last_dir= -1;
562
    
563
    for(;;){
564
        dir= dirs[ improvement[0] > improvement[1] ? 0 : 1 ];
565
        if(improvement[dir&1]==-1) return dmin;
566
        
567
        {
568
            const int x= best[0] + x_dir[dir];
569
            const int y= best[1] + y_dir[dir];
570
            const int key= (y<<ME_MAP_MV_BITS) + x + map_generation;
571
            const int index= ((y<<ME_MAP_SHIFT) + x)&(ME_MAP_SIZE-1);
572
            int d;
573
            if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){
574
                if(map[index]!=key){
575
                    d = pix_abs(new_pic, old_pic + x + y*pic_stride, pic_stride);
576
                    d += (mv_penalty[(x<<shift)-pred_x] + mv_penalty[(y<<shift)-pred_y])*quant;
577
                    map[index]=key;
578
                    score_map[index]=d;
579
                    if(d<dmin){
580
                        improvement[dir&1]= dmin-d;
581
                        improvement[(dir&1)^1]++;
582
                        dmin=d;
583
                        best[0]= x;
584
                        best[1]= y;
585
                        last_dir=dir;
586
                        continue;
587
                    }
588
                }else{
589
                    d= score_map[index];
590
                }
591
            }else{
592
                d= dmin + 1000; //FIXME is this a good idea?
593
            }
594
            /* evaluated point was cached or checked and worse */
595

    
596
            if(last_dir==dir){
597
                improvement[dir&1]= -1;
598
            }else{
599
                improvement[dir&1]= d-dmin;
600
                last_dir= dirs[dir&1]= dir^2;
601
            }
602
        }
603
    }
604
}
605

    
606
static inline int update_map_generation(MpegEncContext * s)
607
{
608
    s->me_map_generation+= 1<<(ME_MAP_MV_BITS*2);
609
    if(s->me_map_generation==0){
610
        s->me_map_generation= 1<<(ME_MAP_MV_BITS*2);
611
        memset(s->me_map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
612
    }
613
    return s->me_map_generation;
614
}
615

    
616
static int epzs_motion_search(MpegEncContext * s,
617
                             int *mx_ptr, int *my_ptr,
618
                             int P[10][2], int pred_x, int pred_y,
619
                             int xmin, int ymin, int xmax, int ymax, uint8_t * ref_picture)
620
{
621
    int best[2]={0, 0};
622
    int d, dmin; 
623
    UINT8 *new_pic, *old_pic;
624
    const int pic_stride= s->linesize;
625
    const int pic_xy= (s->mb_y*pic_stride + s->mb_x)*16;
626
    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
627
    int quant= s->qscale; // qscale of the prev frame
628
    const int shift= 1+s->quarter_sample;
629
    uint32_t *map= s->me_map;
630
    uint16_t *score_map= s->me_score_map;
631
    int map_generation;
632

    
633
    new_pic = s->new_picture[0] + pic_xy;
634
    old_pic = ref_picture + pic_xy;
635
    
636
    map_generation= update_map_generation(s);
637

    
638
    dmin = pix_abs16x16(new_pic, old_pic, pic_stride);
639
    map[0]= map_generation;
640
    score_map[0]= dmin;
641

    
642
    /* first line */
643
    if ((s->mb_y == 0 || s->first_slice_line)) {
644
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
645
        CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
646
    }else{
647
        if(dmin<256 && ( P_LEFT[0]    |P_LEFT[1]
648
                        |P_TOP[0]     |P_TOP[1]
649
                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
650
            *mx_ptr= 0;
651
            *my_ptr= 0;
652
            s->skip_me=1;
653
            return dmin;
654
        }
655
        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
656
        if(dmin>256*2){
657
            CHECK_MV(P_LAST[0]    >>shift, P_LAST[1]    >>shift)
658
            CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
659
            CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
660
            CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
661
        }
662
    }
663
    if(dmin>256*4){
664
        CHECK_MV(P_LAST_RIGHT[0] >>shift, P_LAST_RIGHT[1] >>shift)
665
        CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
666
    }
667
#if 0 //doest only slow things down
668
    if(dmin>512*3){
669
        int step;
670
        dmin= score_map[0];
671
        best[0]= best[1]=0;
672
        for(step=128; step>0; step>>=1){
673
            const int step2= step;
674
            int y;
675
            for(y=-step2+best[1]; y<=step2+best[1]; y+=step){
676
                int x;
677
                if(y<ymin || y>ymax) continue;
678

679
                for(x=-step2+best[0]; x<=step2+best[0]; x+=step){
680
                    if(x<xmin || x>xmax) continue;
681
                    if(x==best[0] && y==best[1]) continue;
682
                    CHECK_MV(x,y)
683
                }
684
            }
685
        }
686
    }
687
#endif
688
//check(best[0],best[1],0, b0)
689
    if(s->me_method==ME_EPZS)
690
        dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, 
691
                                   pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
692
                                   shift, map, score_map, map_generation, pix_abs16x16);
693
    else
694
        dmin=         cross_search(s, best, dmin, new_pic, old_pic, pic_stride, 
695
                                   pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
696
                                   shift, map, score_map, map_generation, pix_abs16x16);
697
//check(best[0],best[1],0, b1)
698
    *mx_ptr= best[0];
699
    *my_ptr= best[1];    
700

    
701
//    printf("%d %d %d \n", best[0], best[1], dmin);
702
    return dmin;
703
}
704

    
705
static int epzs_motion_search4(MpegEncContext * s, int block,
706
                             int *mx_ptr, int *my_ptr,
707
                             int P[10][2], int pred_x, int pred_y,
708
                             int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
709
{
710
    int best[2]={0, 0};
711
    int d, dmin; 
712
    UINT8 *new_pic, *old_pic;
713
    const int pic_stride= s->linesize;
714
    const int pic_xy= ((s->mb_y*2 + (block>>1))*pic_stride + s->mb_x*2 + (block&1))*8;
715
    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
716
    int quant= s->qscale; // qscale of the prev frame
717
    const int shift= 1+s->quarter_sample;
718
    uint32_t *map= s->me_map;
719
    uint16_t *score_map= s->me_score_map;
720
    int map_generation;
721

    
722
    new_pic = s->new_picture[0] + pic_xy;
723
    old_pic = ref_picture + pic_xy;
724

    
725
    map_generation= update_map_generation(s);
726

    
727
    dmin = 1000000;
728
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax); 
729
    /* first line */
730
    if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
731
        CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
732
        CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift)
733
        CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift)
734
    }else{
735
        CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift)
736
        //FIXME try some early stop
737
        if(dmin>64*2){
738
            CHECK_MV4(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
739
            CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
740
            CHECK_MV4(P_TOP[0]>>shift, P_TOP[1]>>shift)
741
            CHECK_MV4(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
742
            CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift)
743
        }
744
    }
745
    if(dmin>64*4){
746
        CHECK_MV4(P_LAST_RIGHT[0]>>shift, P_LAST_RIGHT[1]>>shift)
747
        CHECK_MV4(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
748
    }
749

    
750
    if(s->me_method==ME_EPZS)
751
        dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, 
752
                                   pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
753
                                   shift, map, score_map, map_generation, pix_abs8x8);
754
    else
755
        dmin=         cross_search(s, best, dmin, new_pic, old_pic, pic_stride, 
756
                                   pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
757
                                   shift, map, score_map, map_generation, pix_abs8x8);
758

    
759
    *mx_ptr= best[0];
760
    *my_ptr= best[1];    
761

    
762
//    printf("%d %d %d \n", best[0], best[1], dmin);
763
    return dmin;
764
}
765

    
766
#define CHECK_HALF_MV(suffix, x, y) \
767
{\
768
    d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
769
    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\
770
    COPY3_IF_LT(dminh, d, dx, x, dy, y)\
771
}
772

    
773
    
774
/* The idea would be to make half pel ME after Inter/Intra decision to 
775
   save time. */
776
static inline int halfpel_motion_search(MpegEncContext * s,
777
                                  int *mx_ptr, int *my_ptr, int dmin,
778
                                  int xmin, int ymin, int xmax, int ymax,
779
                                  int pred_x, int pred_y, uint8_t *ref_picture,
780
                                  op_pixels_abs_func pix_abs_x2, 
781
                                  op_pixels_abs_func pix_abs_y2, op_pixels_abs_func pix_abs_xy2, int n)
782
{
783
    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
784
    const int quant= s->qscale;
785
    int mx, my, xx, yy, dminh;
786
    UINT8 *pix, *ptr;
787

    
788
    if(s->skip_me){
789
        *mx_ptr = 0;
790
        *my_ptr = 0;
791
        return dmin;
792
    }else
793

    
794
    xx = 16 * s->mb_x + 8*(n&1);
795
    yy = 16 * s->mb_y + 8*(n>>1);
796
    pix =  s->new_picture[0] + (yy * s->linesize) + xx;
797

    
798
    mx = *mx_ptr;
799
    my = *my_ptr;
800
    ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx);
801
    
802
    dminh = dmin;
803

    
804
    if (mx > xmin && mx < xmax && 
805
        my > ymin && my < ymax) {
806
        int dx=0, dy=0;
807
        int d, pen_x, pen_y; 
808

    
809
        mx<<=1;
810
        my<<=1;
811
        
812
        pen_x= pred_x + mx;
813
        pen_y= pred_y + my;
814

    
815
        ptr-= s->linesize;
816
        CHECK_HALF_MV(xy2, -1, -1)
817
        CHECK_HALF_MV(y2 ,  0, -1)
818
        CHECK_HALF_MV(xy2, +1, -1)
819
        
820
        ptr+= s->linesize;
821
        CHECK_HALF_MV(x2 , -1,  0)
822
        CHECK_HALF_MV(x2 , +1,  0)
823
        CHECK_HALF_MV(xy2, -1, +1)
824
        CHECK_HALF_MV(y2 ,  0, +1)
825
        CHECK_HALF_MV(xy2, +1, +1)
826

    
827
        mx+=dx;
828
        my+=dy;
829
    }else{
830
        mx<<=1;
831
        my<<=1;
832
    }
833

    
834
    *mx_ptr = mx;
835
    *my_ptr = my;
836
    return dminh;
837
}
838

    
839
static inline int fast_halfpel_motion_search(MpegEncContext * s,
840
                                  int *mx_ptr, int *my_ptr, int dmin,
841
                                  int xmin, int ymin, int xmax, int ymax,
842
                                  int pred_x, int pred_y, uint8_t *ref_picture,
843
                                  op_pixels_abs_func pix_abs_x2, 
844
                                  op_pixels_abs_func pix_abs_y2, op_pixels_abs_func pix_abs_xy2, int n)
845
{
846
    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
847
    uint16_t *score_map= s->me_score_map;
848
    const int quant= s->qscale;
849
    int mx, my, xx, yy, dminh;
850
    UINT8 *pix, *ptr;
851

    
852
    if(s->skip_me){
853
//    printf("S");
854
        *mx_ptr = 0;
855
        *my_ptr = 0;
856
        return dmin;
857
    }
858
//    printf("N");
859
        
860
    xx = 16 * s->mb_x + 8*(n&1);
861
    yy = 16 * s->mb_y + 8*(n>>1);
862
    pix =  s->new_picture[0] + (yy * s->linesize) + xx;
863

    
864
    mx = *mx_ptr;
865
    my = *my_ptr;
866
    ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx);
867
    
868
    dminh = dmin;
869

    
870
    if (mx > xmin && mx < xmax && 
871
        my > ymin && my < ymax) {
872
        int dx=0, dy=0;
873
        int d, pen_x, pen_y; 
874
        const int index= (my<<ME_MAP_SHIFT) + mx;
875
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
876
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
877
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
878
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
879
        mx<<=1;
880
        my<<=1;
881

    
882
        
883
        pen_x= pred_x + mx;
884
        pen_y= pred_y + my;
885

    
886
        ptr-= s->linesize;
887
        if(t<=b){
888
            CHECK_HALF_MV(y2 ,  0, -1)
889
            if(l<=r){
890
                CHECK_HALF_MV(xy2, -1, -1)
891
                if(t+r<=b+l){
892
                    CHECK_HALF_MV(xy2, +1, -1)
893
                    ptr+= s->linesize;
894
                }else{
895
                    ptr+= s->linesize;
896
                    CHECK_HALF_MV(xy2, -1, +1)
897
                }
898
                CHECK_HALF_MV(x2 , -1,  0)
899
            }else{
900
                CHECK_HALF_MV(xy2, +1, -1)
901
                if(t+l<=b+r){
902
                    CHECK_HALF_MV(xy2, -1, -1)
903
                    ptr+= s->linesize;
904
                }else{
905
                    ptr+= s->linesize;
906
                    CHECK_HALF_MV(xy2, +1, +1)
907
                }
908
                CHECK_HALF_MV(x2 , +1,  0)
909
            }
910
        }else{
911
            if(l<=r){
912
                if(t+l<=b+r){
913
                    CHECK_HALF_MV(xy2, -1, -1)
914
                    ptr+= s->linesize;
915
                }else{
916
                    ptr+= s->linesize;
917
                    CHECK_HALF_MV(xy2, +1, +1)
918
                }
919
                CHECK_HALF_MV(x2 , -1,  0)
920
                CHECK_HALF_MV(xy2, -1, +1)
921
            }else{
922
                if(t+r<=b+l){
923
                    CHECK_HALF_MV(xy2, +1, -1)
924
                    ptr+= s->linesize;
925
                }else{
926
                    ptr+= s->linesize;
927
                    CHECK_HALF_MV(xy2, -1, +1)
928
                }
929
                CHECK_HALF_MV(x2 , +1,  0)
930
                CHECK_HALF_MV(xy2, +1, +1)
931
            }
932
            CHECK_HALF_MV(y2 ,  0, +1)
933
        }
934
        mx+=dx;
935
        my+=dy;
936

    
937
    }else{
938
        mx<<=1;
939
        my<<=1;
940
    }
941

    
942
    *mx_ptr = mx;
943
    *my_ptr = my;
944
    return dminh;
945
}
946

    
947
static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
948
{
949
    const int xy= s->mb_x + 1 + (s->mb_y + 1)*(s->mb_width + 2);
950
    
951
    s->p_mv_table[xy][0] = mx;
952
    s->p_mv_table[xy][1] = my;
953

    
954
    /* has allready been set to the 4 MV if 4MV is done */
955
    if(mv4){
956
        int mot_xy= s->block_index[0];
957

    
958
        s->motion_val[mot_xy  ][0]= mx;
959
        s->motion_val[mot_xy  ][1]= my;
960
        s->motion_val[mot_xy+1][0]= mx;
961
        s->motion_val[mot_xy+1][1]= my;
962

    
963
        mot_xy += s->block_wrap[0];
964
        s->motion_val[mot_xy  ][0]= mx;
965
        s->motion_val[mot_xy  ][1]= my;
966
        s->motion_val[mot_xy+1][0]= mx;
967
        s->motion_val[mot_xy+1][1]= my;
968
    }
969
}
970

    
971
static inline void get_limits(MpegEncContext *s, int *range, int *xmin, int *ymin, int *xmax, int *ymax, int f_code)
972
{
973
    *range = 8 * (1 << (f_code - 1));
974
    /* XXX: temporary kludge to avoid overflow for msmpeg4 */
975
    if (s->out_format == FMT_H263 && !s->h263_msmpeg4)
976
        *range *= 2;
977

    
978
    if (s->unrestricted_mv) {
979
        *xmin = -16;
980
        *ymin = -16;
981
        if (s->h263_plus)
982
            *range *= 2;
983
        if(s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MPEG4){
984
            *xmax = s->mb_width*16;
985
            *ymax = s->mb_height*16;
986
        }else {
987
            /* XXX: dunno if this is correct but ffmpeg4 decoder wont like it otherwise 
988
                    (cuz the drawn edge isnt large enough))*/
989
            *xmax = s->width;
990
            *ymax = s->height;
991
        }
992
    } else {
993
        *xmin = 0;
994
        *ymin = 0;
995
        *xmax = s->mb_width*16 - 16;
996
        *ymax = s->mb_height*16 - 16;
997
    }
998
}
999

    
1000
static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift)
1001
{
1002
    int block;
1003
    int P[10][2];
1004
    uint8_t *ref_picture= s->last_picture[0];
1005
    int dmin_sum=0;
1006

    
1007
    for(block=0; block<4; block++){
1008
        int mx4, my4;
1009
        int pred_x4, pred_y4;
1010
        int dmin4;
1011
        static const int off[4]= {2, 1, 1, -1};
1012
        const int mot_stride = s->block_wrap[0];
1013
        const int mot_xy = s->block_index[block];
1014
//        const int block_x= (block&1);
1015
//        const int block_y= (block>>1);
1016
#if 1 // this saves us a bit of cliping work and shouldnt affect compression in a negative way
1017
        const int rel_xmin4= xmin;
1018
        const int rel_xmax4= xmax;
1019
        const int rel_ymin4= ymin;
1020
        const int rel_ymax4= ymax;
1021
#else
1022
        const int rel_xmin4= xmin - block_x*8;
1023
        const int rel_xmax4= xmax - block_x*8 + 8;
1024
        const int rel_ymin4= ymin - block_y*8;
1025
        const int rel_ymax4= ymax - block_y*8 + 8;
1026
#endif
1027
        P_LAST[0] = s->motion_val[mot_xy    ][0];
1028
        P_LAST[1] = s->motion_val[mot_xy    ][1];
1029
        P_LEFT[0] = s->motion_val[mot_xy - 1][0];
1030
        P_LEFT[1] = s->motion_val[mot_xy - 1][1];
1031
        P_LAST_RIGHT[0] = s->motion_val[mot_xy + 1][0];
1032
        P_LAST_RIGHT[1] = s->motion_val[mot_xy + 1][1];
1033
        P_LAST_BOTTOM[0]= s->motion_val[mot_xy + 1*mot_stride][0];
1034
        P_LAST_BOTTOM[1]= s->motion_val[mot_xy + 1*mot_stride][1];
1035

    
1036
        if(P_LEFT[0]       > (rel_xmax4<<shift)) P_LEFT[0]       = (rel_xmax4<<shift);
1037
        if(P_LAST_RIGHT[0] < (rel_xmin4<<shift)) P_LAST_RIGHT[0] = (rel_xmin4<<shift);
1038
        if(P_LAST_BOTTOM[1]< (rel_ymin4<<shift)) P_LAST_BOTTOM[1]= (rel_ymin4<<shift);
1039

    
1040
        /* special case for first line */
1041
        if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
1042
            pred_x4= P_LEFT[0];
1043
            pred_y4= P_LEFT[1];
1044
        } else {
1045
            P_TOP[0]      = s->motion_val[mot_xy - mot_stride             ][0];
1046
            P_TOP[1]      = s->motion_val[mot_xy - mot_stride             ][1];
1047
            P_TOPRIGHT[0] = s->motion_val[mot_xy - mot_stride + off[block]][0];
1048
            P_TOPRIGHT[1] = s->motion_val[mot_xy - mot_stride + off[block]][1];
1049
            if(P_TOP[1]      > (rel_ymax4<<shift)) P_TOP[1]     = (rel_ymax4<<shift);
1050
            if(P_TOPRIGHT[0] < (rel_xmin4<<shift)) P_TOPRIGHT[0]= (rel_xmin4<<shift);
1051
            if(P_TOPRIGHT[0] > (rel_xmax4<<shift)) P_TOPRIGHT[0]= (rel_xmax4<<shift);
1052
            if(P_TOPRIGHT[1] > (rel_ymax4<<shift)) P_TOPRIGHT[1]= (rel_ymax4<<shift);
1053
    
1054
            P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1055
            P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1056

    
1057
            if(s->out_format == FMT_H263){
1058
                pred_x4 = P_MEDIAN[0];
1059
                pred_y4 = P_MEDIAN[1];
1060
            }else { /* mpeg1 at least */
1061
                pred_x4= P_LEFT[0];
1062
                pred_y4= P_LEFT[1];
1063
            }
1064
        }
1065
        P_MV1[0]= mx;
1066
        P_MV1[1]= my;
1067

    
1068
        dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture);
1069

    
1070
        dmin4= fast_halfpel_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 
1071
                                   pred_x4, pred_y4, ref_picture, pix_abs8x8_x2, 
1072
                                   pix_abs8x8_y2, pix_abs8x8_xy2, block);
1073
 
1074
        s->motion_val[ s->block_index[block] ][0]= mx4;
1075
        s->motion_val[ s->block_index[block] ][1]= my4;
1076
        dmin_sum+= dmin4;
1077
    }
1078
    return dmin_sum;
1079
}
1080

    
1081
void ff_estimate_p_frame_motion(MpegEncContext * s,
1082
                                int mb_x, int mb_y)
1083
{
1084
    UINT8 *pix, *ppix;
1085
    int sum, varc, vard, mx, my, range, dmin, xx, yy;
1086
    int xmin, ymin, xmax, ymax;
1087
    int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
1088
    int pred_x=0, pred_y=0;
1089
    int P[10][2];
1090
    const int shift= 1+s->quarter_sample;
1091
    int mb_type=0;
1092
    uint8_t *ref_picture= s->last_picture[0];
1093

    
1094
    get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, s->f_code);
1095
    rel_xmin= xmin - mb_x*16;
1096
    rel_xmax= xmax - mb_x*16;
1097
    rel_ymin= ymin - mb_y*16;
1098
    rel_ymax= ymax - mb_y*16;
1099
    s->skip_me=0;
1100

    
1101
    switch(s->me_method) {
1102
    case ME_ZERO:
1103
    default:
1104
        no_motion_search(s, &mx, &my);
1105
        mx-= mb_x*16;
1106
        my-= mb_y*16;
1107
        dmin = 0;
1108
        break;
1109
    case ME_FULL:
1110
        dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
1111
        mx-= mb_x*16;
1112
        my-= mb_y*16;
1113
        break;
1114
    case ME_LOG:
1115
        dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
1116
        mx-= mb_x*16;
1117
        my-= mb_y*16;
1118
        break;
1119
    case ME_PHODS:
1120
        dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
1121
        mx-= mb_x*16;
1122
        my-= mb_y*16;
1123
        break;
1124
    case ME_X1:
1125
    case ME_EPZS:
1126
       {
1127
            const int mot_stride = s->block_wrap[0];
1128
            const int mot_xy = s->block_index[0];
1129

    
1130
            P_LAST[0]       = s->motion_val[mot_xy    ][0];
1131
            P_LAST[1]       = s->motion_val[mot_xy    ][1];
1132
            P_LEFT[0]       = s->motion_val[mot_xy - 1][0];
1133
            P_LEFT[1]       = s->motion_val[mot_xy - 1][1];
1134
            P_LAST_RIGHT[0] = s->motion_val[mot_xy + 2][0];
1135
            P_LAST_RIGHT[1] = s->motion_val[mot_xy + 2][1];
1136
            P_LAST_BOTTOM[0]= s->motion_val[mot_xy + 2*mot_stride][0];
1137
            P_LAST_BOTTOM[1]= s->motion_val[mot_xy + 2*mot_stride][1];
1138

    
1139
            if(P_LEFT[0]       > (rel_xmax<<shift)) P_LEFT[0]       = (rel_xmax<<shift);
1140
            if(P_LAST_RIGHT[0] < (rel_xmin<<shift)) P_LAST_RIGHT[0] = (rel_xmin<<shift);
1141
            if(P_LAST_BOTTOM[1]< (rel_ymin<<shift)) P_LAST_BOTTOM[1]= (rel_ymin<<shift);
1142

    
1143
            /* special case for first line */
1144
            if ((mb_y == 0 || s->first_slice_line)) {
1145
                pred_x= P_LEFT[0];
1146
                pred_y= P_LEFT[1];
1147
            } else {
1148
                P_TOP[0]      = s->motion_val[mot_xy - mot_stride    ][0];
1149
                P_TOP[1]      = s->motion_val[mot_xy - mot_stride    ][1];
1150
                P_TOPRIGHT[0] = s->motion_val[mot_xy - mot_stride + 2][0];
1151
                P_TOPRIGHT[1] = s->motion_val[mot_xy - mot_stride + 2][1];
1152
                if(P_TOP[1]      > (rel_ymax<<shift)) P_TOP[1]     = (rel_ymax<<shift);
1153
                if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift);
1154
                if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift);
1155
        
1156
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1157
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1158

    
1159
                if(s->out_format == FMT_H263){
1160
                    pred_x = P_MEDIAN[0];
1161
                    pred_y = P_MEDIAN[1];
1162
                }else { /* mpeg1 at least */
1163
                    pred_x= P_LEFT[0];
1164
                    pred_y= P_LEFT[1];
1165
                }
1166
            }
1167
        }
1168
        dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture);
1169
 
1170
        break;
1171
    }
1172

    
1173
    /* intra / predictive decision */
1174
    xx = mb_x * 16;
1175
    yy = mb_y * 16;
1176

    
1177
    pix = s->new_picture[0] + (yy * s->linesize) + xx;
1178
    /* At this point (mx,my) are full-pell and the relative displacement */
1179
    ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx);
1180
    
1181
    sum = pix_sum(pix, s->linesize);
1182
    
1183
    sum= (sum+8)>>4;
1184
    varc = (pix_norm1(pix, s->linesize) - sum*sum + 500 + 128)>>8;
1185
    vard = (pix_norm(pix, ppix, s->linesize)+128)>>8;
1186
//printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
1187
    s->mb_var   [s->mb_width * mb_y + mb_x] = varc;
1188
    s->mc_mb_var[s->mb_width * mb_y + mb_x] = vard;
1189
    s->mb_var_sum    += varc;
1190
    s->mc_mb_var_sum += vard;
1191
//printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
1192
    
1193
#if 0
1194
    printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
1195
           varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
1196
#endif
1197
    if(s->flags&CODEC_FLAG_HQ){
1198
        if (vard*2 + 200 > varc)
1199
            mb_type|= MB_TYPE_INTRA;
1200
        if (varc*2 + 200 > vard){
1201
            mb_type|= MB_TYPE_INTER;
1202
            if(s->me_method >= ME_EPZS)
1203
                fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1204
                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
1205
                                           pix_abs16x16_xy2, 0);
1206
            else
1207
                halfpel_motion_search(     s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1208
                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
1209
                                           pix_abs16x16_xy2, 0);                                           
1210
        }else{
1211
            mx <<=1;
1212
            my <<=1;
1213
        }
1214
        if((s->flags&CODEC_FLAG_4MV)
1215
           && !s->skip_me && varc>50 && vard>10){
1216
            mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
1217
            mb_type|=MB_TYPE_INTER4V;
1218

    
1219
            set_p_mv_tables(s, mx, my, 0);
1220
        }else
1221
            set_p_mv_tables(s, mx, my, 1);
1222
    }else{
1223
        if (vard <= 64 || vard < varc) {
1224
            mb_type|= MB_TYPE_INTER;
1225
            if (s->me_method != ME_ZERO) {
1226
                if(s->me_method >= ME_EPZS)
1227
                    dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1228
                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
1229
                                           pix_abs16x16_xy2, 0);
1230
                else
1231
                    dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1232
                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
1233
                                           pix_abs16x16_xy2, 0);
1234
                if((s->flags&CODEC_FLAG_4MV)
1235
                   && !s->skip_me && varc>50 && vard>10){
1236
                    int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
1237
                    if(dmin4 + 128 <dmin)
1238
                        mb_type= MB_TYPE_INTER4V;
1239
                }
1240
                set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V);
1241

    
1242
            } else {
1243
                mx <<=1;
1244
                my <<=1;
1245
            }
1246
#if 0
1247
            if (vard < 10) {
1248
                skip++;
1249
                fprintf(stderr,"\nEarly skip: %d vard: %2d varc: %5d dmin: %d", 
1250
                                skip, vard, varc, dmin);
1251
            }
1252
#endif
1253
        }else{
1254
            mb_type|= MB_TYPE_INTRA;
1255
            mx = 0;
1256
            my = 0;
1257
        }
1258
    }
1259

    
1260
    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
1261
}
1262

    
1263
int ff_estimate_motion_b(MpegEncContext * s,
1264
                       int mb_x, int mb_y, int16_t (*mv_table)[2], uint8_t *ref_picture, int f_code)
1265
{
1266
    int mx, my, range, dmin;
1267
    int xmin, ymin, xmax, ymax;
1268
    int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
1269
    int pred_x=0, pred_y=0;
1270
    int P[10][2];
1271
    const int shift= 1+s->quarter_sample;
1272
    const int mot_stride = s->mb_width + 2;
1273
    const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
1274
    
1275
    get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, f_code);
1276
    rel_xmin= xmin - mb_x*16;
1277
    rel_xmax= xmax - mb_x*16;
1278
    rel_ymin= ymin - mb_y*16;
1279
    rel_ymax= ymax - mb_y*16;
1280

    
1281
    switch(s->me_method) {
1282
    case ME_ZERO:
1283
    default:
1284
        no_motion_search(s, &mx, &my);
1285
        dmin = 0;
1286
        mx-= mb_x*16;
1287
        my-= mb_y*16;
1288
        break;
1289
    case ME_FULL:
1290
        dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
1291
        mx-= mb_x*16;
1292
        my-= mb_y*16;
1293
        break;
1294
    case ME_LOG:
1295
        dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
1296
        mx-= mb_x*16;
1297
        my-= mb_y*16;
1298
        break;
1299
    case ME_PHODS:
1300
        dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
1301
        mx-= mb_x*16;
1302
        my-= mb_y*16;
1303
        break;
1304
    case ME_X1:
1305
    case ME_EPZS:
1306
       {
1307

    
1308
            P_LAST[0]        = mv_table[mot_xy    ][0];
1309
            P_LAST[1]        = mv_table[mot_xy    ][1];
1310
            P_LEFT[0]        = mv_table[mot_xy - 1][0];
1311
            P_LEFT[1]        = mv_table[mot_xy - 1][1];
1312
            P_LAST_RIGHT[0]  = mv_table[mot_xy + 1][0];
1313
            P_LAST_RIGHT[1]  = mv_table[mot_xy + 1][1];
1314
            P_LAST_BOTTOM[0] = mv_table[mot_xy + mot_stride][0];
1315
            P_LAST_BOTTOM[1] = mv_table[mot_xy + mot_stride][1];
1316

    
1317
            if(P_LEFT[0]       > (rel_xmax<<shift)) P_LEFT[0]       = (rel_xmax<<shift);
1318
            if(P_LAST_RIGHT[0] < (rel_xmin<<shift)) P_LAST_RIGHT[0] = (rel_xmin<<shift);
1319
            if(P_LAST_BOTTOM[1]< (rel_ymin<<shift)) P_LAST_BOTTOM[1]= (rel_ymin<<shift);
1320

    
1321
            /* special case for first line */
1322
            if ((mb_y == 0 || s->first_slice_line)) {
1323
            } else {
1324
                P_TOP[0] = mv_table[mot_xy - mot_stride             ][0];
1325
                P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
1326
                P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
1327
                P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1         ][1];
1328
                if(P_TOP[1] > (rel_ymax<<shift)) P_TOP[1]= (rel_ymax<<shift);
1329
                if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift);
1330
                if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift);
1331
        
1332
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1333
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1334
            }
1335
            pred_x= P_LEFT[0];
1336
            pred_y= P_LEFT[1];
1337
        }
1338
        dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture);
1339
 
1340
        break;
1341
    }
1342
    
1343
    dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1344
                                pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
1345
                                pix_abs16x16_xy2, 0);
1346
//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1347
//    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
1348
    mv_table[mot_xy][0]= mx;
1349
    mv_table[mot_xy][1]= my;
1350
    return dmin;
1351
}
1352

    
1353

    
1354
static inline int check_bidir_mv(MpegEncContext * s,
1355
                   int mb_x, int mb_y,
1356
                   int motion_fx, int motion_fy,
1357
                   int motion_bx, int motion_by,
1358
                   int pred_fx, int pred_fy,
1359
                   int pred_bx, int pred_by)
1360
{
1361
    //FIXME optimize?
1362
    //FIXME direct mode penalty
1363
    UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
1364
    uint8_t *dest_y = s->me_scratchpad;
1365
    uint8_t *ptr;
1366
    int dxy;
1367
    int src_x, src_y;
1368
    int fbmin;
1369

    
1370
    fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->qscale;
1371

    
1372
    dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1373
    src_x = mb_x * 16 + (motion_fx >> 1);
1374
    src_y = mb_y * 16 + (motion_fy >> 1);
1375
            
1376
    ptr = s->last_picture[0] + (src_y * s->linesize) + src_x;
1377
    put_pixels_tab[dxy](dest_y    , ptr    , s->linesize, 16);
1378
    put_pixels_tab[dxy](dest_y + 8, ptr + 8, s->linesize, 16);
1379
    
1380
    fbmin += (mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->qscale;
1381

    
1382
    dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1383
    src_x = mb_x * 16 + (motion_bx >> 1);
1384
    src_y = mb_y * 16 + (motion_by >> 1);
1385
            
1386
    ptr = s->next_picture[0] + (src_y * s->linesize) + src_x;
1387
    avg_pixels_tab[dxy](dest_y    , ptr    , s->linesize, 16);
1388
    avg_pixels_tab[dxy](dest_y + 8, ptr + 8, s->linesize, 16);
1389
    
1390
    fbmin += pix_abs16x16(s->new_picture[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
1391
    return fbmin;
1392
}
1393

    
1394
/* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
1395
static inline int bidir_refine(MpegEncContext * s,
1396
                                  int mb_x, int mb_y)
1397
{
1398
    const int mot_stride = s->mb_width + 2;
1399
    const int xy = (mb_y + 1)*mot_stride + mb_x + 1;
1400
    int fbmin;
1401
    int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
1402
    int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
1403
    int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
1404
    int pred_by= s->b_bidir_back_mv_table[xy-1][1];
1405
    int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
1406
    int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
1407
    int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
1408
    int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
1409

    
1410
    //FIXME do refinement and add flag
1411
    
1412
    fbmin= check_bidir_mv(s, mb_x, mb_y, 
1413
                          motion_fx, motion_fy,
1414
                          motion_bx, motion_by,
1415
                          pred_fx, pred_fy,
1416
                          pred_bx, pred_by);
1417

    
1418
   return fbmin;
1419
}
1420

    
1421
static inline int direct_search(MpegEncContext * s,
1422
                                int mb_x, int mb_y)
1423
{
1424
    int P[10][2];
1425
    const int mot_stride = s->mb_width + 2;
1426
    const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
1427
    int dmin, dmin2;
1428
    int motion_fx, motion_fy, motion_bx, motion_by, motion_bx0, motion_by0;
1429
    int motion_dx, motion_dy;
1430
    const int motion_px= s->p_mv_table[mot_xy][0];
1431
    const int motion_py= s->p_mv_table[mot_xy][1];
1432
    const int time_pp= s->pp_time;
1433
    const int time_bp= s->bp_time;
1434
    const int time_pb= time_pp - time_bp;
1435
    int bx, by;
1436
    int mx, my, mx2, my2;
1437
    uint8_t *ref_picture= s->me_scratchpad - (mb_x + 1 + (mb_y + 1)*s->linesize)*16;
1438
    int16_t (*mv_table)[2]= s->b_direct_mv_table;
1439
    uint16_t *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
1440

    
1441
    /* thanks to iso-mpeg the rounding is different for the zero vector, so we need to handle that ... */
1442
    motion_fx= (motion_px*time_pb)/time_pp;
1443
    motion_fy= (motion_py*time_pb)/time_pp;
1444
    motion_bx0= (-motion_px*time_bp)/time_pp;
1445
    motion_by0= (-motion_py*time_bp)/time_pp;
1446
    motion_dx= motion_dy=0;
1447
    dmin2= check_bidir_mv(s, mb_x, mb_y, 
1448
                          motion_fx, motion_fy,
1449
                          motion_bx0, motion_by0,
1450
                          motion_fx, motion_fy,
1451
                          motion_bx0, motion_by0) - s->qscale;
1452

    
1453
    motion_bx= motion_fx - motion_px;
1454
    motion_by= motion_fy - motion_py;
1455
    for(by=-1; by<2; by++){
1456
        for(bx=-1; bx<2; bx++){
1457
            uint8_t *dest_y = s->me_scratchpad + (by+1)*s->linesize*16 + (bx+1)*16;
1458
            uint8_t *ptr;
1459
            int dxy;
1460
            int src_x, src_y;
1461
            const int width= s->width;
1462
            const int height= s->height;
1463

    
1464
            dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1465
            src_x = (mb_x + bx) * 16 + (motion_fx >> 1);
1466
            src_y = (mb_y + by) * 16 + (motion_fy >> 1);
1467
            src_x = clip(src_x, -16, width);
1468
            if (src_x == width) dxy &= ~1;
1469
            src_y = clip(src_y, -16, height);
1470
            if (src_y == height) dxy &= ~2;
1471

    
1472
            ptr = s->last_picture[0] + (src_y * s->linesize) + src_x;
1473
            put_pixels_tab[dxy](dest_y    , ptr    , s->linesize, 16);
1474
            put_pixels_tab[dxy](dest_y + 8, ptr + 8, s->linesize, 16);
1475

    
1476
            dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1477
            src_x = (mb_x + bx) * 16 + (motion_bx >> 1);
1478
            src_y = (mb_y + by) * 16 + (motion_by >> 1);
1479
            src_x = clip(src_x, -16, width);
1480
            if (src_x == width) dxy &= ~1;
1481
            src_y = clip(src_y, -16, height);
1482
            if (src_y == height) dxy &= ~2;
1483

    
1484
            avg_pixels_tab[dxy](dest_y    , ptr    , s->linesize, 16);
1485
            avg_pixels_tab[dxy](dest_y + 8, ptr + 8, s->linesize, 16);
1486
        }
1487
    }
1488

    
1489
    P_LAST[0]        = mv_table[mot_xy    ][0];
1490
    P_LAST[1]        = mv_table[mot_xy    ][1];
1491
    P_LEFT[0]        = mv_table[mot_xy - 1][0];
1492
    P_LEFT[1]        = mv_table[mot_xy - 1][1];
1493
    P_LAST_RIGHT[0]  = mv_table[mot_xy + 1][0];
1494
    P_LAST_RIGHT[1]  = mv_table[mot_xy + 1][1];
1495
    P_LAST_BOTTOM[0] = mv_table[mot_xy + mot_stride][0];
1496
    P_LAST_BOTTOM[1] = mv_table[mot_xy + mot_stride][1];
1497
/*
1498
    if(P_LEFT[0]       > (rel_xmax<<shift)) P_LEFT[0]       = (rel_xmax<<shift);
1499
    if(P_LAST_RIGHT[0] < (rel_xmin<<shift)) P_LAST_RIGHT[0] = (rel_xmin<<shift);
1500
    if(P_LAST_BOTTOM[1]< (rel_ymin<<shift)) P_LAST_BOTTOM[1]= (rel_ymin<<shift);
1501
*/
1502
    /* special case for first line */
1503
    if ((mb_y == 0 || s->first_slice_line)) {
1504
    } else {
1505
        P_TOP[0] = mv_table[mot_xy - mot_stride             ][0];
1506
        P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
1507
        P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
1508
        P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1         ][1];
1509
    
1510
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1511
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1512
    }
1513
    dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, -16, -16, 15, 15, ref_picture);
1514
    if(mx==0 && my==0) dmin=99999999; // not representable, due to rounding stuff
1515
    if(dmin2<dmin){ 
1516
        dmin= dmin2;
1517
        mx=0;
1518
        my=0;
1519
    }
1520
#if 1
1521
    mx2= mx= mx*2; 
1522
    my2= my= my*2;
1523
    for(by=-1; by<2; by++){
1524
        if(my2+by < -32) continue;
1525
        for(bx=-1; bx<2; bx++){
1526
            if(bx==0 && by==0) continue;
1527
            if(mx2+bx < -32) continue;
1528
            dmin2= check_bidir_mv(s, mb_x, mb_y, 
1529
                          mx2+bx+motion_fx, my2+by+motion_fy,
1530
                          mx2+bx+motion_bx, my2+by+motion_by,
1531
                          mx2+bx+motion_fx, my2+by+motion_fy,
1532
                          motion_bx, motion_by) - s->qscale;
1533
            
1534
            if(dmin2<dmin){
1535
                dmin=dmin2;
1536
                mx= mx2 + bx;
1537
                my= my2 + by;
1538
            }
1539
        }
1540
    }
1541
#else
1542
    mx*=2; my*=2;
1543
#endif
1544
    if(mx==0 && my==0){
1545
        motion_bx= motion_bx0;
1546
        motion_by= motion_by0;
1547
    }
1548

    
1549
    s->b_direct_mv_table[mot_xy][0]= mx;
1550
    s->b_direct_mv_table[mot_xy][1]= my;
1551
    s->b_direct_forw_mv_table[mot_xy][0]= motion_fx + mx;
1552
    s->b_direct_forw_mv_table[mot_xy][1]= motion_fy + my;
1553
    s->b_direct_back_mv_table[mot_xy][0]= motion_bx + mx;
1554
    s->b_direct_back_mv_table[mot_xy][1]= motion_by + my;
1555
    return dmin;
1556
}
1557

    
1558
void ff_estimate_b_frame_motion(MpegEncContext * s,
1559
                             int mb_x, int mb_y)
1560
{
1561
    const int quant= s->qscale;
1562
    int fmin, bmin, dmin, fbmin;
1563
    int type=0;
1564
    
1565
    dmin= direct_search(s, mb_x, mb_y);
1566

    
1567
    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, s->last_picture[0], s->f_code);
1568
    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, s->next_picture[0], s->b_code) - quant;
1569
//printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1570

    
1571
    fbmin= bidir_refine(s, mb_x, mb_y);
1572

    
1573
    if(s->flags&CODEC_FLAG_HQ){
1574
        type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT;
1575
    }else{
1576
        int score= dmin;
1577
        type=MB_TYPE_DIRECT;
1578
        
1579
        if(fmin<score){
1580
            score=fmin;
1581
            type= MB_TYPE_FORWARD; 
1582
        }
1583
        if(bmin<score){
1584
            score=bmin;
1585
            type= MB_TYPE_BACKWARD; 
1586
        }
1587
        if(fbmin<score){
1588
            score=fbmin;
1589
            type= MB_TYPE_BIDIR;
1590
        }
1591
        s->mc_mb_var_sum += score;
1592
        s->mc_mb_var[mb_y*s->mb_width + mb_x] = score;
1593
    }
1594
/*
1595
{
1596
static int count=0;
1597
static int sum=0;
1598
if(type==MB_TYPE_DIRECT){
1599
  int diff= ABS(s->b_forw_mv_table)
1600
}
1601
}*/
1602

    
1603
    s->mb_type[mb_y*s->mb_width + mb_x]= type;
1604
/*    if(mb_y==0 && mb_x==0) printf("\n");
1605
    if(mb_x==0) printf("\n");
1606
    printf("%d", av_log2(type));
1607
*/
1608
}
1609

    
1610
/* find best f_code for ME which do unlimited searches */
1611
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
1612
{
1613
    if(s->me_method>=ME_EPZS){
1614
        int score[8];
1615
        int i, y;
1616
        UINT8 * fcode_tab= s->fcode_tab;
1617
        int best_fcode=-1;
1618
        int best_score=-10000000;
1619

    
1620
        for(i=0; i<8; i++) score[i]= s->mb_num*(8-i); //FIXME *2 and all other too so its the same but nicer
1621

    
1622
        for(y=0; y<s->mb_height; y++){
1623
            int x;
1624
            int xy= (y+1)* (s->mb_width+2) + 1;
1625
            i= y*s->mb_width;
1626
            for(x=0; x<s->mb_width; x++){
1627
                if(s->mb_type[i] & type){
1628
                    int fcode= MAX(fcode_tab[mv_table[xy][0] + MAX_MV],
1629
                                   fcode_tab[mv_table[xy][1] + MAX_MV]);
1630
                    int j;
1631
                    
1632
                    for(j=0; j<fcode && j<8; j++){
1633
                        if(s->pict_type==B_TYPE || s->mc_mb_var[i] < s->mb_var[i])
1634
                            score[j]-= 170;
1635
                    }
1636
                }
1637
                i++;
1638
                xy++;
1639
            }
1640
        }
1641
        
1642
        for(i=1; i<8; i++){
1643
            if(score[i] > best_score){
1644
                best_score= score[i];
1645
                best_fcode= i;
1646
            }
1647
//            printf("%d %d\n", i, score[i]);
1648
        }
1649

    
1650
//    printf("fcode: %d type: %d\n", i, s->pict_type);
1651
        return best_fcode;
1652
/*        for(i=0; i<=MAX_FCODE; i++){
1653
            printf("%d ", mv_num[i]);
1654
        }
1655
        printf("\n");*/
1656
    }else{
1657
        return 1;
1658
    }
1659
}
1660

    
1661
void ff_fix_long_p_mvs(MpegEncContext * s)
1662
{
1663
    const int f_code= s->f_code;
1664
    int y;
1665
    UINT8 * fcode_tab= s->fcode_tab;
1666
//int clip=0;
1667
//int noclip=0;
1668
    /* clip / convert to intra 16x16 type MVs */
1669
    for(y=0; y<s->mb_height; y++){
1670
        int x;
1671
        int xy= (y+1)* (s->mb_width+2)+1;
1672
        int i= y*s->mb_width;
1673
        for(x=0; x<s->mb_width; x++){
1674
            if(s->mb_type[i]&MB_TYPE_INTER){
1675
                if(   fcode_tab[s->p_mv_table[xy][0] + MAX_MV] > f_code
1676
                   || fcode_tab[s->p_mv_table[xy][0] + MAX_MV] == 0
1677
                   || fcode_tab[s->p_mv_table[xy][1] + MAX_MV] > f_code
1678
                   || fcode_tab[s->p_mv_table[xy][1] + MAX_MV] == 0 ){
1679
                    s->mb_type[i] &= ~MB_TYPE_INTER;
1680
                    s->mb_type[i] |= MB_TYPE_INTRA;
1681
                    s->p_mv_table[xy][0] = 0;
1682
                    s->p_mv_table[xy][1] = 0;
1683
//clip++;
1684
                }
1685
//else
1686
//  noclip++;
1687
            }
1688
            xy++;
1689
            i++;
1690
        }
1691
    }
1692
//printf("%d no:%d %d//\n", clip, noclip, f_code);
1693
    if(s->flags&CODEC_FLAG_4MV){
1694
        const int wrap= 2+ s->mb_width*2;
1695

    
1696
        /* clip / convert to intra 8x8 type MVs */
1697
        for(y=0; y<s->mb_height; y++){
1698
            int xy= (y*2 + 1)*wrap + 1;
1699
            int i= y*s->mb_width;
1700
            int x;
1701

    
1702
            for(x=0; x<s->mb_width; x++){
1703
                if(s->mb_type[i]&MB_TYPE_INTER4V){
1704
                    int block;
1705
                    for(block=0; block<4; block++){
1706
                        int off= (block& 1) + (block>>1)*wrap;
1707
                        int mx= s->motion_val[ xy + off ][0];
1708
                        int my= s->motion_val[ xy + off ][1];
1709

    
1710
                        if(   fcode_tab[mx + MAX_MV] > f_code
1711
                           || fcode_tab[mx + MAX_MV] == 0
1712
                           || fcode_tab[my + MAX_MV] > f_code
1713
                           || fcode_tab[my + MAX_MV] == 0 ){
1714
                            s->mb_type[i] &= ~MB_TYPE_INTER4V;
1715
                            s->mb_type[i] |= MB_TYPE_INTRA;
1716
                        }
1717
                    }
1718
                    xy+=2;
1719
                    i++;
1720
                }
1721
            }
1722
        }
1723
    }
1724
}
1725

    
1726
void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type)
1727
{
1728
    int y;
1729
    UINT8 * fcode_tab= s->fcode_tab;
1730

    
1731
    /* clip / convert to intra 16x16 type MVs */
1732
    for(y=0; y<s->mb_height; y++){
1733
        int x;
1734
        int xy= (y+1)* (s->mb_width+2)+1;
1735
        int i= y*s->mb_width;
1736
        for(x=0; x<s->mb_width; x++){
1737
            if(s->mb_type[i]&type){
1738
                if(   fcode_tab[mv_table[xy][0] + MAX_MV] > f_code
1739
                   || fcode_tab[mv_table[xy][0] + MAX_MV] == 0
1740
                   || fcode_tab[mv_table[xy][1] + MAX_MV] > f_code
1741
                   || fcode_tab[mv_table[xy][1] + MAX_MV] == 0 ){
1742
                    if(s->mb_type[i]&(~type)) s->mb_type[i] &= ~type;
1743
                    else{
1744
                        mv_table[xy][0] = 0;
1745
                        mv_table[xy][1] = 0;
1746
                        //this is certainly bad FIXME            
1747
                    }
1748
                }
1749
            }
1750
            xy++;
1751
            i++;
1752
        }
1753
    }
1754
}