Statistics
| Branch: | Revision:

ffmpeg / libavcodec / motion_est.c @ b07a5980

History | View | Annotate | Download (48.1 KB)

1
/*
2
 * Motion estimation 
3
 * Copyright (c) 2000,2001 Fabrice Bellard.
4
 * Copyright (c) 2002 Michael Niedermayer
5
 * 
6
 *
7
 * This library is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2 of the License, or (at your option) any later version.
11
 *
12
 * This library is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with this library; if not, write to the Free Software
19
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20
 *
21
 * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
22
 */
23
#include <stdlib.h>
24
#include <stdio.h>
25
#include "avcodec.h"
26
#include "dsputil.h"
27
#include "mpegvideo.h"
28

    
29
//#undef NDEBUG
30
//#include <assert.h>
31

    
32
#define SQ(a) ((a)*(a))
33

    
34
#define P_LEFT P[1]
35
#define P_TOP P[2]
36
#define P_TOPRIGHT P[3]
37
#define P_MEDIAN P[4]
38
#define P_MV1 P[9]
39

    
40
static inline int sad_hpel_motion_search(MpegEncContext * s,
41
                                  int *mx_ptr, int *my_ptr, int dmin,
42
                                  int xmin, int ymin, int xmax, int ymax,
43
                                  int pred_x, int pred_y, Picture *picture,
44
                                  int n, int size, uint16_t * const mv_penalty);
45

    
46
static inline int update_map_generation(MpegEncContext * s)
47
{
48
    s->me.map_generation+= 1<<(ME_MAP_MV_BITS*2);
49
    if(s->me.map_generation==0){
50
        s->me.map_generation= 1<<(ME_MAP_MV_BITS*2);
51
        memset(s->me.map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
52
    }
53
    return s->me.map_generation;
54
}
55

    
56
/* shape adaptive search stuff */
57
typedef struct Minima{
58
    int height;
59
    int x, y;
60
    int checked;
61
}Minima;
62

    
63
static int minima_cmp(const void *a, const void *b){
64
    Minima *da = (Minima *) a;
65
    Minima *db = (Minima *) b;
66
    
67
    return da->height - db->height;
68
}
69
                                  
70
/* SIMPLE */
71
#define RENAME(a) simple_ ## a
72

    
73
#define CMP(d, x, y, size)\
74
d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);
75

    
76
#define CMP_HPEL(d, dx, dy, x, y, size)\
77
{\
78
    const int dxy= (dx) + 2*(dy);\
79
    hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\
80
    d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
81
}
82

    
83
#define CMP_QPEL(d, dx, dy, x, y, size)\
84
{\
85
    const int dxy= (dx) + 4*(dy);\
86
    qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
87
    d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
88
}
89

    
90
#include "motion_est_template.c"
91
#undef RENAME
92
#undef CMP
93
#undef CMP_HPEL
94
#undef CMP_QPEL
95
#undef INIT
96

    
97
/* SIMPLE CHROMA */
98
#define RENAME(a) simple_chroma_ ## a
99

    
100
#define CMP(d, x, y, size)\
101
d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);\
102
if(chroma_cmp){\
103
    int dxy= ((x)&1) + 2*((y)&1);\
104
    int c= ((x)>>1) + ((y)>>1)*uvstride;\
105
\
106
    chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
107
    d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride);\
108
    chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
109
    d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride);\
110
}
111

    
112
#define CMP_HPEL(d, dx, dy, x, y, size)\
113
{\
114
    const int dxy= (dx) + 2*(dy);\
115
    hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\
116
    d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
117
    if(chroma_cmp_sub){\
118
        int cxy= (dxy) | ((x)&1) | (2*((y)&1));\
119
        int c= ((x)>>1) + ((y)>>1)*uvstride;\
120
        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
121
        d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\
122
        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
123
        d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\
124
    }\
125
}
126

    
127
#define CMP_QPEL(d, dx, dy, x, y, size)\
128
{\
129
    const int dxy= (dx) + 4*(dy);\
130
    qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
131
    d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
132
    if(chroma_cmp_sub){\
133
        int cxy, c;\
134
        int cx= (4*(x) + (dx))/2;\
135
        int cy= (4*(y) + (dy))/2;\
136
        cx= (cx>>1)|(cx&1);\
137
        cy= (cy>>1)|(cy&1);\
138
        cxy= (cx&1) + 2*(cy&1);\
139
        c= ((cx)>>1) + ((cy)>>1)*uvstride;\
140
        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
141
        d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\
142
        chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
143
        d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\
144
    }\
145
}
146

    
147
#include "motion_est_template.c"
148
#undef RENAME
149
#undef CMP
150
#undef CMP_HPEL
151
#undef CMP_QPEL
152
#undef INIT
153

    
154
/* SIMPLE DIRECT HPEL */
155
#define RENAME(a) simple_direct_hpel_ ## a
156
//FIXME precalc divisions stuff
157

    
158
#define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\
159
if((x) >= xmin && 2*(x) + (dx) <= 2*xmax && (y) >= ymin && 2*(y) + (dy) <= 2*ymax){\
160
    const int hx= 2*(x) + (dx);\
161
    const int hy= 2*(y) + (dy);\
162
    if(s->mv_type==MV_TYPE_8X8){\
163
        int i;\
164
        for(i=0; i<4; i++){\
165
            int fx = s->me.direct_basis_mv[i][0] + hx;\
166
            int fy = s->me.direct_basis_mv[i][1] + hy;\
167
            int bx = hx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\
168
            int by = hy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\
169
            int fxy= (fx&1) + 2*(fy&1);\
170
            int bxy= (bx&1) + 2*(by&1);\
171
\
172
            uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
173
            hpel_put[1][fxy](dst, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 8);\
174
            hpel_avg[1][bxy](dst, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 8);\
175
        }\
176
    }else{\
177
        int fx = s->me.direct_basis_mv[0][0] + hx;\
178
        int fy = s->me.direct_basis_mv[0][1] + hy;\
179
        int bx = hx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\
180
        int by = hy ? fy - s->me.co_located_mv[0][1] : s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp;\
181
        int fxy= (fx&1) + 2*(fy&1);\
182
        int bxy= (bx&1) + 2*(by&1);\
183
\
184
        hpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 16);\
185
        hpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 16);\
186
    }\
187
    d = cmp_func(s, s->me.scratchpad, src_y, stride);\
188
}else\
189
    d= 256*256*256*32;
190

    
191

    
192
#define CMP_HPEL(d, dx, dy, x, y, size)\
193
    CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub)
194

    
195
#define CMP(d, x, y, size)\
196
    CMP_DIRECT(d, 0, 0, x, y, size, cmp)
197
    
198
#include "motion_est_template.c"
199
#undef RENAME
200
#undef CMP
201
#undef CMP_HPEL
202
#undef CMP_QPEL
203
#undef INIT
204
#undef CMP_DIRECT
205

    
206
/* SIMPLE DIRECT QPEL */
207
#define RENAME(a) simple_direct_qpel_ ## a
208

    
209
#define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\
210
if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*ymax){\
211
    const int qx= 4*(x) + (dx);\
212
    const int qy= 4*(y) + (dy);\
213
    if(s->mv_type==MV_TYPE_8X8){\
214
        int i;\
215
        for(i=0; i<4; i++){\
216
            int fx = s->me.direct_basis_mv[i][0] + qx;\
217
            int fy = s->me.direct_basis_mv[i][1] + qy;\
218
            int bx = qx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\
219
            int by = qy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\
220
            int fxy= (fx&3) + 4*(fy&3);\
221
            int bxy= (bx&3) + 4*(by&3);\
222
\
223
            uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
224
            qpel_put[1][fxy](dst, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\
225
            qpel_avg[1][bxy](dst, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\
226
        }\
227
    }else{\
228
        int fx = s->me.direct_basis_mv[0][0] + qx;\
229
        int fy = s->me.direct_basis_mv[0][1] + qy;\
230
        int bx = qx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\
231
        int by = qy ? fy - s->me.co_located_mv[0][1] : s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp;\
232
        int fxy= (fx&3) + 4*(fy&3);\
233
        int bxy= (bx&3) + 4*(by&3);\
234
\
235
        qpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\
236
        qpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\
237
    }\
238
    d = cmp_func(s, s->me.scratchpad, src_y, stride);\
239
}else\
240
    d= 256*256*256*32;
241

    
242

    
243
#define CMP_QPEL(d, dx, dy, x, y, size)\
244
    CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub)
245

    
246
#define CMP(d, x, y, size)\
247
    CMP_DIRECT(d, 0, 0, x, y, size, cmp)
248

    
249
#include "motion_est_template.c"
250
#undef RENAME
251
#undef CMP
252
#undef CMP_HPEL
253
#undef CMP_QPEL
254
#undef INIT
255
#undef CMP__DIRECT
256

    
257

    
258
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride){
259
    return 0;
260
}
261

    
262
static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){
263
    DSPContext* c= &s->dsp;
264
    int i;
265
    
266
    memset(cmp, 0, sizeof(void*)*11);
267

    
268
    switch(type&0xFF){
269
    case FF_CMP_SAD:
270
        cmp[0]= c->sad[0];
271
        cmp[1]= c->sad[1];
272
        break;
273
    case FF_CMP_SATD:
274
        cmp[0]= c->hadamard8_diff[0];
275
        cmp[1]= c->hadamard8_diff[1];
276
        break;
277
    case FF_CMP_SSE:
278
        cmp[0]= c->sse[0];
279
        cmp[1]= c->sse[1];
280
        break;
281
    case FF_CMP_DCT:
282
        cmp[0]= c->dct_sad[0];
283
        cmp[1]= c->dct_sad[1];
284
        break;
285
    case FF_CMP_PSNR:
286
        cmp[0]= c->quant_psnr[0];
287
        cmp[1]= c->quant_psnr[1];
288
        break;
289
    case FF_CMP_ZERO:
290
        for(i=0; i<7; i++){
291
            cmp[i]= zero_cmp;
292
        }
293
        break;
294
    default:
295
        fprintf(stderr,"internal error in cmp function selection\n");
296
    }
297
};
298

    
299
static inline int get_penalty_factor(MpegEncContext *s, int type){
300

    
301
    switch(type){
302
    default:
303
    case FF_CMP_SAD:
304
        return s->qscale;
305
    case FF_CMP_SSE:
306
//        return s->qscale*8;
307
    case FF_CMP_DCT:
308
    case FF_CMP_SATD:
309
        return s->qscale*8;
310
    }
311
}
312

    
313
void ff_init_me(MpegEncContext *s){
314
    set_cmp(s, s->dsp.me_cmp, s->avctx->me_cmp);
315
    set_cmp(s, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
316
    set_cmp(s, s->dsp.mb_cmp, s->avctx->mb_cmp);
317

    
318
    if(s->flags&CODEC_FLAG_QPEL){
319
        if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
320
            s->me.sub_motion_search= simple_chroma_qpel_motion_search;
321
        else
322
            s->me.sub_motion_search= simple_qpel_motion_search;
323
    }else{
324
        if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
325
            s->me.sub_motion_search= simple_chroma_hpel_motion_search;
326
        else if(s->avctx->me_sub_cmp == FF_CMP_SAD && s->avctx->me_cmp == FF_CMP_SAD)
327
            s->me.sub_motion_search= sad_hpel_motion_search;
328
        else
329
            s->me.sub_motion_search= simple_hpel_motion_search;
330
    }
331

    
332
    if(s->avctx->me_cmp&FF_CMP_CHROMA){
333
        s->me.motion_search[0]= simple_chroma_epzs_motion_search;
334
        s->me.motion_search[1]= simple_chroma_epzs_motion_search4;
335
    }else{
336
        s->me.motion_search[0]= simple_epzs_motion_search;
337
        s->me.motion_search[1]= simple_epzs_motion_search4;
338
    }
339
}
340
      
341
static int pix_dev(UINT8 * pix, int line_size, int mean)
342
{
343
    int s, i, j;
344

    
345
    s = 0;
346
    for (i = 0; i < 16; i++) {
347
        for (j = 0; j < 16; j += 8) {
348
            s += ABS(pix[0]-mean);
349
            s += ABS(pix[1]-mean);
350
            s += ABS(pix[2]-mean);
351
            s += ABS(pix[3]-mean);
352
            s += ABS(pix[4]-mean);
353
            s += ABS(pix[5]-mean);
354
            s += ABS(pix[6]-mean);
355
            s += ABS(pix[7]-mean);
356
            pix += 8;
357
        }
358
        pix += line_size - 16;
359
    }
360
    return s;
361
}
362

    
363
static inline void no_motion_search(MpegEncContext * s,
364
                                    int *mx_ptr, int *my_ptr)
365
{
366
    *mx_ptr = 16 * s->mb_x;
367
    *my_ptr = 16 * s->mb_y;
368
}
369

    
370
static int full_motion_search(MpegEncContext * s,
371
                              int *mx_ptr, int *my_ptr, int range,
372
                              int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
373
{
374
    int x1, y1, x2, y2, xx, yy, x, y;
375
    int mx, my, dmin, d;
376
    UINT8 *pix;
377

    
378
    xx = 16 * s->mb_x;
379
    yy = 16 * s->mb_y;
380
    x1 = xx - range + 1;        /* we loose one pixel to avoid boundary pb with half pixel pred */
381
    if (x1 < xmin)
382
        x1 = xmin;
383
    x2 = xx + range - 1;
384
    if (x2 > xmax)
385
        x2 = xmax;
386
    y1 = yy - range + 1;
387
    if (y1 < ymin)
388
        y1 = ymin;
389
    y2 = yy + range - 1;
390
    if (y2 > ymax)
391
        y2 = ymax;
392
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
393
    dmin = 0x7fffffff;
394
    mx = 0;
395
    my = 0;
396
    for (y = y1; y <= y2; y++) {
397
        for (x = x1; x <= x2; x++) {
398
            d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x,
399
                             s->linesize);
400
            if (d < dmin ||
401
                (d == dmin &&
402
                 (abs(x - xx) + abs(y - yy)) <
403
                 (abs(mx - xx) + abs(my - yy)))) {
404
                dmin = d;
405
                mx = x;
406
                my = y;
407
            }
408
        }
409
    }
410

    
411
    *mx_ptr = mx;
412
    *my_ptr = my;
413

    
414
#if 0
415
    if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
416
        *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
417
        fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
418
    }
419
#endif
420
    return dmin;
421
}
422

    
423

    
424
static int log_motion_search(MpegEncContext * s,
425
                             int *mx_ptr, int *my_ptr, int range,
426
                             int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
427
{
428
    int x1, y1, x2, y2, xx, yy, x, y;
429
    int mx, my, dmin, d;
430
    UINT8 *pix;
431

    
432
    xx = s->mb_x << 4;
433
    yy = s->mb_y << 4;
434

    
435
    /* Left limit */
436
    x1 = xx - range;
437
    if (x1 < xmin)
438
        x1 = xmin;
439

    
440
    /* Right limit */
441
    x2 = xx + range;
442
    if (x2 > xmax)
443
        x2 = xmax;
444

    
445
    /* Upper limit */
446
    y1 = yy - range;
447
    if (y1 < ymin)
448
        y1 = ymin;
449

    
450
    /* Lower limit */
451
    y2 = yy + range;
452
    if (y2 > ymax)
453
        y2 = ymax;
454

    
455
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
456
    dmin = 0x7fffffff;
457
    mx = 0;
458
    my = 0;
459

    
460
    do {
461
        for (y = y1; y <= y2; y += range) {
462
            for (x = x1; x <= x2; x += range) {
463
                d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
464
                if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
465
                    dmin = d;
466
                    mx = x;
467
                    my = y;
468
                }
469
            }
470
        }
471

    
472
        range = range >> 1;
473

    
474
        x1 = mx - range;
475
        if (x1 < xmin)
476
            x1 = xmin;
477

    
478
        x2 = mx + range;
479
        if (x2 > xmax)
480
            x2 = xmax;
481

    
482
        y1 = my - range;
483
        if (y1 < ymin)
484
            y1 = ymin;
485

    
486
        y2 = my + range;
487
        if (y2 > ymax)
488
            y2 = ymax;
489

    
490
    } while (range >= 1);
491

    
492
#ifdef DEBUG
493
    fprintf(stderr, "log       - MX: %d\tMY: %d\n", mx, my);
494
#endif
495
    *mx_ptr = mx;
496
    *my_ptr = my;
497
    return dmin;
498
}
499

    
500
static int phods_motion_search(MpegEncContext * s,
501
                               int *mx_ptr, int *my_ptr, int range,
502
                               int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
503
{
504
    int x1, y1, x2, y2, xx, yy, x, y, lastx, d;
505
    int mx, my, dminx, dminy;
506
    UINT8 *pix;
507

    
508
    xx = s->mb_x << 4;
509
    yy = s->mb_y << 4;
510

    
511
    /* Left limit */
512
    x1 = xx - range;
513
    if (x1 < xmin)
514
        x1 = xmin;
515

    
516
    /* Right limit */
517
    x2 = xx + range;
518
    if (x2 > xmax)
519
        x2 = xmax;
520

    
521
    /* Upper limit */
522
    y1 = yy - range;
523
    if (y1 < ymin)
524
        y1 = ymin;
525

    
526
    /* Lower limit */
527
    y2 = yy + range;
528
    if (y2 > ymax)
529
        y2 = ymax;
530

    
531
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
532
    mx = 0;
533
    my = 0;
534

    
535
    x = xx;
536
    y = yy;
537
    do {
538
        dminx = 0x7fffffff;
539
        dminy = 0x7fffffff;
540

    
541
        lastx = x;
542
        for (x = x1; x <= x2; x += range) {
543
            d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
544
            if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
545
                dminx = d;
546
                mx = x;
547
            }
548
        }
549

    
550
        x = lastx;
551
        for (y = y1; y <= y2; y += range) {
552
            d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
553
            if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
554
                dminy = d;
555
                my = y;
556
            }
557
        }
558

    
559
        range = range >> 1;
560

    
561
        x = mx;
562
        y = my;
563
        x1 = mx - range;
564
        if (x1 < xmin)
565
            x1 = xmin;
566

    
567
        x2 = mx + range;
568
        if (x2 > xmax)
569
            x2 = xmax;
570

    
571
        y1 = my - range;
572
        if (y1 < ymin)
573
            y1 = ymin;
574

    
575
        y2 = my + range;
576
        if (y2 > ymax)
577
            y2 = ymax;
578

    
579
    } while (range >= 1);
580

    
581
#ifdef DEBUG
582
    fprintf(stderr, "phods     - MX: %d\tMY: %d\n", mx, my);
583
#endif
584

    
585
    /* half pixel search */
586
    *mx_ptr = mx;
587
    *my_ptr = my;
588
    return dminy;
589
}
590

    
591

    
592
#define Z_THRESHOLD 256
593

    
594
#define CHECK_SAD_HALF_MV(suffix, x, y) \
595
{\
596
    d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
597
    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
598
    COPY3_IF_LT(dminh, d, dx, x, dy, y)\
599
}
600

    
601
static inline int sad_hpel_motion_search(MpegEncContext * s,
602
                                  int *mx_ptr, int *my_ptr, int dmin,
603
                                  int xmin, int ymin, int xmax, int ymax,
604
                                  int pred_x, int pred_y, Picture *picture,
605
                                  int n, int size, uint16_t * const mv_penalty)
606
{
607
    uint8_t *ref_picture= picture->data[0];
608
    uint32_t *score_map= s->me.score_map;
609
    const int penalty_factor= s->me.sub_penalty_factor;
610
    int mx, my, xx, yy, dminh;
611
    UINT8 *pix, *ptr;
612
    op_pixels_abs_func pix_abs_x2;
613
    op_pixels_abs_func pix_abs_y2;
614
    op_pixels_abs_func pix_abs_xy2;
615
    
616
    if(size==0){
617
        pix_abs_x2 = s->dsp.pix_abs16x16_x2;
618
        pix_abs_y2 = s->dsp.pix_abs16x16_y2;
619
        pix_abs_xy2= s->dsp.pix_abs16x16_xy2;
620
    }else{
621
        pix_abs_x2 = s->dsp.pix_abs8x8_x2;
622
        pix_abs_y2 = s->dsp.pix_abs8x8_y2;
623
        pix_abs_xy2= s->dsp.pix_abs8x8_xy2;
624
    }
625

    
626
    if(s->me.skip){
627
//    printf("S");
628
        *mx_ptr = 0;
629
        *my_ptr = 0;
630
        return dmin;
631
    }
632
//    printf("N");
633
        
634
    xx = 16 * s->mb_x + 8*(n&1);
635
    yy = 16 * s->mb_y + 8*(n>>1);
636
    pix =  s->new_picture.data[0] + (yy * s->linesize) + xx;
637

    
638
    mx = *mx_ptr;
639
    my = *my_ptr;
640
    ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx);
641
    
642
    dminh = dmin;
643

    
644
    if (mx > xmin && mx < xmax && 
645
        my > ymin && my < ymax) {
646
        int dx=0, dy=0;
647
        int d, pen_x, pen_y; 
648
        const int index= (my<<ME_MAP_SHIFT) + mx;
649
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
650
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
651
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
652
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
653
        mx<<=1;
654
        my<<=1;
655

    
656
        
657
        pen_x= pred_x + mx;
658
        pen_y= pred_y + my;
659

    
660
        ptr-= s->linesize;
661
        if(t<=b){
662
            CHECK_SAD_HALF_MV(y2 , 0, -1)
663
            if(l<=r){
664
                CHECK_SAD_HALF_MV(xy2, -1, -1)
665
                if(t+r<=b+l){
666
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
667
                    ptr+= s->linesize;
668
                }else{
669
                    ptr+= s->linesize;
670
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
671
                }
672
                CHECK_SAD_HALF_MV(x2 , -1,  0)
673
            }else{
674
                CHECK_SAD_HALF_MV(xy2, +1, -1)
675
                if(t+l<=b+r){
676
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
677
                    ptr+= s->linesize;
678
                }else{
679
                    ptr+= s->linesize;
680
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
681
                }
682
                CHECK_SAD_HALF_MV(x2 , +1,  0)
683
            }
684
        }else{
685
            if(l<=r){
686
                if(t+l<=b+r){
687
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
688
                    ptr+= s->linesize;
689
                }else{
690
                    ptr+= s->linesize;
691
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
692
                }
693
                CHECK_SAD_HALF_MV(x2 , -1,  0)
694
                CHECK_SAD_HALF_MV(xy2, -1, +1)
695
            }else{
696
                if(t+r<=b+l){
697
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
698
                    ptr+= s->linesize;
699
                }else{
700
                    ptr+= s->linesize;
701
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
702
                }
703
                CHECK_SAD_HALF_MV(x2 , +1,  0)
704
                CHECK_SAD_HALF_MV(xy2, +1, +1)
705
            }
706
            CHECK_SAD_HALF_MV(y2 ,  0, +1)
707
        }
708
        mx+=dx;
709
        my+=dy;
710

    
711
    }else{
712
        mx<<=1;
713
        my<<=1;
714
    }
715

    
716
    *mx_ptr = mx;
717
    *my_ptr = my;
718
    return dminh;
719
}
720

    
721
static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
722
{
723
    const int xy= s->mb_x + 1 + (s->mb_y + 1)*(s->mb_width + 2);
724
    
725
    s->p_mv_table[xy][0] = mx;
726
    s->p_mv_table[xy][1] = my;
727

    
728
    /* has allready been set to the 4 MV if 4MV is done */
729
    if(mv4){
730
        int mot_xy= s->block_index[0];
731

    
732
        s->motion_val[mot_xy  ][0]= mx;
733
        s->motion_val[mot_xy  ][1]= my;
734
        s->motion_val[mot_xy+1][0]= mx;
735
        s->motion_val[mot_xy+1][1]= my;
736

    
737
        mot_xy += s->block_wrap[0];
738
        s->motion_val[mot_xy  ][0]= mx;
739
        s->motion_val[mot_xy  ][1]= my;
740
        s->motion_val[mot_xy+1][0]= mx;
741
        s->motion_val[mot_xy+1][1]= my;
742
    }
743
}
744

    
745
static inline void get_limits(MpegEncContext *s, int *range, int *xmin, int *ymin, int *xmax, int *ymax, int f_code)
746
{
747
    *range = 8 * (1 << (f_code - 1));
748
    /* XXX: temporary kludge to avoid overflow for msmpeg4 */
749
    if (s->out_format == FMT_H263 && !s->h263_msmpeg4)
750
        *range *= 2;
751

    
752
    if (s->unrestricted_mv) {
753
        *xmin = -16;
754
        *ymin = -16;
755
        if (s->h263_plus)
756
            *range *= 2;
757
        if(s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MPEG4){
758
            *xmax = s->mb_width*16;
759
            *ymax = s->mb_height*16;
760
        }else {
761
            /* XXX: dunno if this is correct but ffmpeg4 decoder wont like it otherwise 
762
                    (cuz the drawn edge isnt large enough))*/
763
            *xmax = s->width;
764
            *ymax = s->height;
765
        }
766
    } else {
767
        *xmin = 0;
768
        *ymin = 0;
769
        *xmax = s->mb_width*16 - 16;
770
        *ymax = s->mb_height*16 - 16;
771
    }
772
}
773

    
774
static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift)
775
{
776
    int block;
777
    int P[10][2];
778
    uint8_t *ref_picture= s->last_picture.data[0];
779
    int dmin_sum=0;
780
    uint16_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
781

    
782
    for(block=0; block<4; block++){
783
        int mx4, my4;
784
        int pred_x4, pred_y4;
785
        int dmin4;
786
        static const int off[4]= {2, 1, 1, -1};
787
        const int mot_stride = s->block_wrap[0];
788
        const int mot_xy = s->block_index[block];
789
//        const int block_x= (block&1);
790
//        const int block_y= (block>>1);
791
#if 1 // this saves us a bit of cliping work and shouldnt affect compression in a negative way
792
        const int rel_xmin4= xmin;
793
        const int rel_xmax4= xmax;
794
        const int rel_ymin4= ymin;
795
        const int rel_ymax4= ymax;
796
#else
797
        const int rel_xmin4= xmin - block_x*8;
798
        const int rel_xmax4= xmax - block_x*8 + 8;
799
        const int rel_ymin4= ymin - block_y*8;
800
        const int rel_ymax4= ymax - block_y*8 + 8;
801
#endif
802
        P_LEFT[0] = s->motion_val[mot_xy - 1][0];
803
        P_LEFT[1] = s->motion_val[mot_xy - 1][1];
804

    
805
        if(P_LEFT[0]       > (rel_xmax4<<shift)) P_LEFT[0]       = (rel_xmax4<<shift);
806

    
807
        /* special case for first line */
808
        if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
809
            pred_x4= P_LEFT[0];
810
            pred_y4= P_LEFT[1];
811
        } else {
812
            P_TOP[0]      = s->motion_val[mot_xy - mot_stride             ][0];
813
            P_TOP[1]      = s->motion_val[mot_xy - mot_stride             ][1];
814
            P_TOPRIGHT[0] = s->motion_val[mot_xy - mot_stride + off[block]][0];
815
            P_TOPRIGHT[1] = s->motion_val[mot_xy - mot_stride + off[block]][1];
816
            if(P_TOP[1]      > (rel_ymax4<<shift)) P_TOP[1]     = (rel_ymax4<<shift);
817
            if(P_TOPRIGHT[0] < (rel_xmin4<<shift)) P_TOPRIGHT[0]= (rel_xmin4<<shift);
818
            if(P_TOPRIGHT[0] > (rel_xmax4<<shift)) P_TOPRIGHT[0]= (rel_xmax4<<shift);
819
            if(P_TOPRIGHT[1] > (rel_ymax4<<shift)) P_TOPRIGHT[1]= (rel_ymax4<<shift);
820
    
821
            P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
822
            P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
823

    
824
            if(s->out_format == FMT_H263){
825
                pred_x4 = P_MEDIAN[0];
826
                pred_y4 = P_MEDIAN[1];
827
            }else { /* mpeg1 at least */
828
                pred_x4= P_LEFT[0];
829
                pred_y4= P_LEFT[1];
830
            }
831
        }
832
        P_MV1[0]= mx;
833
        P_MV1[1]= my;
834

    
835
        dmin4 = s->me.motion_search[1](s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 
836
                                       &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty);
837

    
838
        dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 
839
                                          pred_x4, pred_y4, &s->last_picture, block, 1, mv_penalty);
840
 
841
        s->motion_val[ s->block_index[block] ][0]= mx4;
842
        s->motion_val[ s->block_index[block] ][1]= my4;
843
        dmin_sum+= dmin4;
844
    }
845
    return dmin_sum;
846
}
847

    
848
void ff_estimate_p_frame_motion(MpegEncContext * s,
849
                                int mb_x, int mb_y)
850
{
851
    UINT8 *pix, *ppix;
852
    int sum, varc, vard, mx, my, range, dmin, xx, yy;
853
    int xmin, ymin, xmax, ymax;
854
    int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
855
    int pred_x=0, pred_y=0;
856
    int P[10][2];
857
    const int shift= 1+s->quarter_sample;
858
    int mb_type=0;
859
    uint8_t *ref_picture= s->last_picture.data[0];
860
    Picture * const pic= &s->current_picture;
861
    uint16_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
862
    
863
    assert(s->quarter_sample==0 || s->quarter_sample==1);
864

    
865
    s->me.penalty_factor    = get_penalty_factor(s, s->avctx->me_cmp);
866
    s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
867

    
868
    get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, s->f_code);
869
    rel_xmin= xmin - mb_x*16;
870
    rel_xmax= xmax - mb_x*16;
871
    rel_ymin= ymin - mb_y*16;
872
    rel_ymax= ymax - mb_y*16;
873
    s->me.skip=0;
874

    
875
    switch(s->me_method) {
876
    case ME_ZERO:
877
    default:
878
        no_motion_search(s, &mx, &my);
879
        mx-= mb_x*16;
880
        my-= mb_y*16;
881
        dmin = 0;
882
        break;
883
    case ME_FULL:
884
        dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
885
        mx-= mb_x*16;
886
        my-= mb_y*16;
887
        break;
888
    case ME_LOG:
889
        dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
890
        mx-= mb_x*16;
891
        my-= mb_y*16;
892
        break;
893
    case ME_PHODS:
894
        dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
895
        mx-= mb_x*16;
896
        my-= mb_y*16;
897
        break;
898
    case ME_X1:
899
    case ME_EPZS:
900
       {
901
            const int mot_stride = s->block_wrap[0];
902
            const int mot_xy = s->block_index[0];
903

    
904
            P_LEFT[0]       = s->motion_val[mot_xy - 1][0];
905
            P_LEFT[1]       = s->motion_val[mot_xy - 1][1];
906

    
907
            if(P_LEFT[0]       > (rel_xmax<<shift)) P_LEFT[0]       = (rel_xmax<<shift);
908

    
909
            /* special case for first line */
910
            if ((mb_y == 0 || s->first_slice_line)) {
911
                pred_x= P_LEFT[0];
912
                pred_y= P_LEFT[1];
913
            } else {
914
                P_TOP[0]      = s->motion_val[mot_xy - mot_stride    ][0];
915
                P_TOP[1]      = s->motion_val[mot_xy - mot_stride    ][1];
916
                P_TOPRIGHT[0] = s->motion_val[mot_xy - mot_stride + 2][0];
917
                P_TOPRIGHT[1] = s->motion_val[mot_xy - mot_stride + 2][1];
918
                if(P_TOP[1]      > (rel_ymax<<shift)) P_TOP[1]     = (rel_ymax<<shift);
919
                if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift);
920
                if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift);
921
        
922
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
923
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
924

    
925
                if(s->out_format == FMT_H263){
926
                    pred_x = P_MEDIAN[0];
927
                    pred_y = P_MEDIAN[1];
928
                }else { /* mpeg1 at least */
929
                    pred_x= P_LEFT[0];
930
                    pred_y= P_LEFT[1];
931
                }
932
            }
933
        }
934
        dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
935
                                      &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty);
936
 
937
        break;
938
    }
939

    
940
    /* intra / predictive decision */
941
    xx = mb_x * 16;
942
    yy = mb_y * 16;
943

    
944
    pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
945
    /* At this point (mx,my) are full-pell and the relative displacement */
946
    ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx);
947
    
948
    sum = s->dsp.pix_sum(pix, s->linesize);
949
    
950
    varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
951
    vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize)+128)>>8;
952

    
953
//printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
954
    pic->mb_var   [s->mb_width * mb_y + mb_x] = varc;
955
    pic->mc_mb_var[s->mb_width * mb_y + mb_x] = vard;
956
    pic->mb_mean  [s->mb_width * mb_y + mb_x] = (sum+128)>>8;
957
    pic->mb_var_sum    += varc;
958
    pic->mc_mb_var_sum += vard;
959
//printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
960
    
961
#if 0
962
    printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
963
           varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
964
#endif
965
    if(s->flags&CODEC_FLAG_HQ){
966
        if (vard <= 64 || vard < varc)
967
            s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
968
        else
969
            s->scene_change_score+= s->qscale;
970

    
971
        if (vard*2 + 200 > varc)
972
            mb_type|= MB_TYPE_INTRA;
973
        if (varc*2 + 200 > vard){
974
            mb_type|= MB_TYPE_INTER;
975
            s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
976
                                   pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
977
        }else{
978
            mx <<=shift;
979
            my <<=shift;
980
        }
981
        if((s->flags&CODEC_FLAG_4MV)
982
           && !s->me.skip && varc>50 && vard>10){
983
            mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
984
            mb_type|=MB_TYPE_INTER4V;
985

    
986
            set_p_mv_tables(s, mx, my, 0);
987
        }else
988
            set_p_mv_tables(s, mx, my, 1);
989
    }else{
990
        if (vard <= 64 || vard < varc) {
991
//        if (sadP <= 32 || sadP < sadI + 500) {
992
            s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
993
            mb_type|= MB_TYPE_INTER;
994
            if (s->me_method != ME_ZERO) {
995
                dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
996
                                            pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
997
                if((s->flags&CODEC_FLAG_4MV)
998
                   && !s->me.skip && varc>50 && vard>10){
999
                    int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
1000
                    if(dmin4 + 128 <dmin)
1001
                        mb_type= MB_TYPE_INTER4V;
1002
                }
1003
                set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V);
1004

    
1005
            } else {
1006
                mx <<=shift;
1007
                my <<=shift;
1008
            }
1009
#if 0
1010
            if (vard < 10) {
1011
                skip++;
1012
                fprintf(stderr,"\nEarly skip: %d vard: %2d varc: %5d dmin: %d", 
1013
                                skip, vard, varc, dmin);
1014
            }
1015
#endif
1016
        }else{
1017
            s->scene_change_score+= 20;
1018
            mb_type|= MB_TYPE_INTRA;
1019
            mx = 0;
1020
            my = 0;
1021
        }
1022
    }
1023

    
1024
    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
1025
}
1026

    
1027
int ff_estimate_motion_b(MpegEncContext * s,
1028
                       int mb_x, int mb_y, int16_t (*mv_table)[2], Picture *picture, int f_code)
1029
{
1030
    int mx, my, range, dmin;
1031
    int xmin, ymin, xmax, ymax;
1032
    int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
1033
    int pred_x=0, pred_y=0;
1034
    int P[10][2];
1035
    const int shift= 1+s->quarter_sample;
1036
    const int mot_stride = s->mb_width + 2;
1037
    const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
1038
    uint8_t * const ref_picture= picture->data[0];
1039
    uint16_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV;
1040
    int mv_scale;
1041
        
1042
    s->me.penalty_factor    = get_penalty_factor(s, s->avctx->me_cmp);
1043
    s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
1044

    
1045
    get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, f_code);
1046
    rel_xmin= xmin - mb_x*16;
1047
    rel_xmax= xmax - mb_x*16;
1048
    rel_ymin= ymin - mb_y*16;
1049
    rel_ymax= ymax - mb_y*16;
1050

    
1051
    switch(s->me_method) {
1052
    case ME_ZERO:
1053
    default:
1054
        no_motion_search(s, &mx, &my);
1055
        dmin = 0;
1056
        mx-= mb_x*16;
1057
        my-= mb_y*16;
1058
        break;
1059
    case ME_FULL:
1060
        dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture);
1061
        mx-= mb_x*16;
1062
        my-= mb_y*16;
1063
        break;
1064
    case ME_LOG:
1065
        dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
1066
        mx-= mb_x*16;
1067
        my-= mb_y*16;
1068
        break;
1069
    case ME_PHODS:
1070
        dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture);
1071
        mx-= mb_x*16;
1072
        my-= mb_y*16;
1073
        break;
1074
    case ME_X1:
1075
    case ME_EPZS:
1076
       {
1077
            P_LEFT[0]        = mv_table[mot_xy - 1][0];
1078
            P_LEFT[1]        = mv_table[mot_xy - 1][1];
1079

    
1080
            if(P_LEFT[0]       > (rel_xmax<<shift)) P_LEFT[0]       = (rel_xmax<<shift);
1081

    
1082
            /* special case for first line */
1083
            if ((mb_y == 0 || s->first_slice_line)) {
1084
            } else {
1085
                P_TOP[0] = mv_table[mot_xy - mot_stride             ][0];
1086
                P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
1087
                P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
1088
                P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1         ][1];
1089
                if(P_TOP[1] > (rel_ymax<<shift)) P_TOP[1]= (rel_ymax<<shift);
1090
                if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift);
1091
                if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift);
1092
        
1093
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1094
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1095
            }
1096
            pred_x= P_LEFT[0];
1097
            pred_y= P_LEFT[1];
1098
        }
1099
        
1100
        if(mv_table == s->b_forw_mv_table){
1101
            mv_scale= (s->pb_time<<16) / (s->pp_time<<shift);
1102
        }else{
1103
            mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
1104
        }
1105
        
1106
        dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
1107
                                      picture, s->p_mv_table, mv_scale, mv_penalty);
1108
 
1109
        break;
1110
    }
1111
    
1112
    dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1113
                                   pred_x, pred_y, picture, 0, 0, mv_penalty);
1114
//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1115
//    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
1116
    mv_table[mot_xy][0]= mx;
1117
    mv_table[mot_xy][1]= my;
1118

    
1119
    return dmin;
1120
}
1121

    
1122
static inline int check_bidir_mv(MpegEncContext * s,
1123
                   int mb_x, int mb_y,
1124
                   int motion_fx, int motion_fy,
1125
                   int motion_bx, int motion_by,
1126
                   int pred_fx, int pred_fy,
1127
                   int pred_bx, int pred_by)
1128
{
1129
    //FIXME optimize?
1130
    //FIXME move into template?
1131
    //FIXME better f_code prediction (max mv & distance)
1132
    UINT16 *mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
1133
    uint8_t *dest_y = s->me.scratchpad;
1134
    uint8_t *ptr;
1135
    int dxy;
1136
    int src_x, src_y;
1137
    int fbmin;
1138

    
1139
    if(s->quarter_sample){
1140
        dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
1141
        src_x = mb_x * 16 + (motion_fx >> 2);
1142
        src_y = mb_y * 16 + (motion_fy >> 2);
1143
        assert(src_x >=-16 && src_x<=s->width);
1144
        assert(src_y >=-16 && src_y<=s->height);
1145

    
1146
        ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
1147
        s->dsp.put_qpel_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize);
1148

    
1149
        dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1150
        src_x = mb_x * 16 + (motion_bx >> 2);
1151
        src_y = mb_y * 16 + (motion_by >> 2);
1152
        assert(src_x >=-16 && src_x<=s->width);
1153
        assert(src_y >=-16 && src_y<=s->height);
1154
    
1155
        ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
1156
        s->dsp.avg_qpel_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize);
1157
    }else{
1158
        dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1159
        src_x = mb_x * 16 + (motion_fx >> 1);
1160
        src_y = mb_y * 16 + (motion_fy >> 1);
1161
        assert(src_x >=-16 && src_x<=s->width);
1162
        assert(src_y >=-16 && src_y<=s->height);
1163

    
1164
        ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
1165
        s->dsp.put_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1166

    
1167
        dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1168
        src_x = mb_x * 16 + (motion_bx >> 1);
1169
        src_y = mb_y * 16 + (motion_by >> 1);
1170
        assert(src_x >=-16 && src_x<=s->width);
1171
        assert(src_y >=-16 && src_y<=s->height);
1172
    
1173
        ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
1174
        s->dsp.avg_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
1175
    }
1176

    
1177
    fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.sub_penalty_factor
1178
           +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.sub_penalty_factor;
1179
           + s->dsp.me_sub_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
1180

    
1181
    return fbmin;
1182
}
1183

    
1184
/* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
1185
static inline int bidir_refine(MpegEncContext * s,
1186
                                  int mb_x, int mb_y)
1187
{
1188
    const int mot_stride = s->mb_width + 2;
1189
    const int xy = (mb_y + 1)*mot_stride + mb_x + 1;
1190
    int fbmin;
1191
    int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
1192
    int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
1193
    int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
1194
    int pred_by= s->b_bidir_back_mv_table[xy-1][1];
1195
    int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
1196
    int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
1197
    int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
1198
    int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
1199

    
1200
    //FIXME do refinement and add flag
1201
    
1202
    fbmin= check_bidir_mv(s, mb_x, mb_y, 
1203
                          motion_fx, motion_fy,
1204
                          motion_bx, motion_by,
1205
                          pred_fx, pred_fy,
1206
                          pred_bx, pred_by);
1207

    
1208
   return fbmin;
1209
}
1210

    
1211
static inline int direct_search(MpegEncContext * s,
1212
                                int mb_x, int mb_y)
1213
{
1214
    int P[10][2];
1215
    const int mot_stride = s->mb_width + 2;
1216
    const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
1217
    const int shift= 1+s->quarter_sample;
1218
    int dmin, i;
1219
    const int time_pp= s->pp_time;
1220
    const int time_pb= s->pb_time;
1221
    int mx, my, xmin, xmax, ymin, ymax;
1222
    int16_t (*mv_table)[2]= s->b_direct_mv_table;
1223
    uint16_t * const mv_penalty= s->me.mv_penalty[1] + MAX_MV;
1224
    
1225
    P_LEFT[0]        = mv_table[mot_xy - 1][0];
1226
    P_LEFT[1]        = mv_table[mot_xy - 1][1];
1227

    
1228
    /* special case for first line */
1229
    if ((mb_y == 0 || s->first_slice_line)) {
1230
    } else {
1231
        P_TOP[0] = mv_table[mot_xy - mot_stride             ][0];
1232
        P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
1233
        P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
1234
        P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1         ][1];
1235
    
1236
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1237
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1238
    }
1239

    
1240
    ymin= xmin=(-32)>>shift;
1241
    ymax= xmax=   31>>shift;
1242

    
1243
    if(s->co_located_type_table[mb_x + mb_y*s->mb_width]==CO_LOCATED_TYPE_4MV){
1244
        s->mv_type= MV_TYPE_8X8;
1245
    }else{
1246
        s->mv_type= MV_TYPE_16X16;
1247
    }
1248

    
1249
    for(i=0; i<4; i++){
1250
        int index= s->block_index[i];
1251
        int min, max;
1252
    
1253
        s->me.co_located_mv[i][0]= s->motion_val[index][0];
1254
        s->me.co_located_mv[i][1]= s->motion_val[index][1];
1255
        s->me.direct_basis_mv[i][0]= s->me.co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
1256
        s->me.direct_basis_mv[i][1]= s->me.co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
1257
//        s->me.direct_basis_mv[1][i][0]= s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
1258
//        s->me.direct_basis_mv[1][i][1]= s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);
1259

    
1260
        max= FFMAX(s->me.direct_basis_mv[i][0], s->me.direct_basis_mv[i][0] - s->me.co_located_mv[i][0])>>shift;
1261
        min= FFMIN(s->me.direct_basis_mv[i][0], s->me.direct_basis_mv[i][0] - s->me.co_located_mv[i][0])>>shift;
1262
        max+= (2*mb_x + (i& 1))*8 - 1; // +-1 is for the simpler rounding
1263
        min+= (2*mb_x + (i& 1))*8 + 1;
1264
        if(max >= s->width) xmax= s->width - max - 1;
1265
        if(min < -16      ) xmin= - 32 - min;
1266

    
1267
        max= FFMAX(s->me.direct_basis_mv[i][1], s->me.direct_basis_mv[i][1] - s->me.co_located_mv[i][1])>>shift;
1268
        min= FFMIN(s->me.direct_basis_mv[i][1], s->me.direct_basis_mv[i][1] - s->me.co_located_mv[i][1])>>shift;
1269
        max+= (2*mb_y + (i>>1))*8 - 1; // +-1 is for the simpler rounding
1270
        min+= (2*mb_y + (i>>1))*8 + 1;
1271
        if(max >= s->height) ymax= s->height - max - 1;
1272
        if(min < -16       ) ymin= - 32 - min;
1273
        
1274
        if(s->mv_type == MV_TYPE_16X16) break;
1275
    }
1276
    
1277
    assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
1278
    
1279
    if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
1280
        s->b_direct_mv_table[mot_xy][0]= 0;
1281
        s->b_direct_mv_table[mot_xy][1]= 0;
1282

    
1283
        return 256*256*256*64;
1284
    }
1285

    
1286
    if(s->flags&CODEC_FLAG_QPEL){
1287
        dmin = simple_direct_qpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, 
1288
                                                     &s->last_picture, mv_table, 1<<14, mv_penalty);
1289
        dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
1290
                                                0, 0, &s->last_picture, 0, 0, mv_penalty);
1291
    }else{
1292
        dmin = simple_direct_hpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, 
1293
                                                     &s->last_picture, mv_table, 1<<15, mv_penalty);
1294
        dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
1295
                                                0, 0, &s->last_picture, 0, 0, mv_penalty);
1296
    }
1297

    
1298
    s->b_direct_mv_table[mot_xy][0]= mx;
1299
    s->b_direct_mv_table[mot_xy][1]= my;
1300
    return dmin;
1301
}
1302

    
1303
void ff_estimate_b_frame_motion(MpegEncContext * s,
1304
                             int mb_x, int mb_y)
1305
{
1306
    const int penalty_factor= s->me.penalty_factor;
1307
    int fmin, bmin, dmin, fbmin;
1308
    int type=0;
1309
    
1310
    dmin= direct_search(s, mb_x, mb_y);
1311

    
1312
    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code);
1313
    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) - penalty_factor;
1314
//printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1315

    
1316
    fbmin= bidir_refine(s, mb_x, mb_y);
1317

    
1318
    {
1319
        int score= dmin;
1320
        type=MB_TYPE_DIRECT;
1321
        
1322
        if(fmin<score){
1323
            score=fmin;
1324
            type= MB_TYPE_FORWARD; 
1325
        }
1326
        if(bmin<score){
1327
            score=bmin;
1328
            type= MB_TYPE_BACKWARD; 
1329
        }
1330
        if(fbmin<score){
1331
            score=fbmin;
1332
            type= MB_TYPE_BIDIR;
1333
        }
1334
        score= ((unsigned)(score*score + 128*256))>>16;
1335
        s->current_picture.mc_mb_var_sum += score;
1336
        s->current_picture.mc_mb_var[mb_y*s->mb_width + mb_x] = score; //FIXME use SSD
1337
    }
1338

    
1339
    if(s->flags&CODEC_FLAG_HQ){
1340
        type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT; //FIXME something smarter
1341
        if(dmin>256*256*16) type&= ~MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
1342
    }
1343

    
1344
    s->mb_type[mb_y*s->mb_width + mb_x]= type;
1345
}
1346

    
1347
/* find best f_code for ME which do unlimited searches */
1348
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
1349
{
1350
    if(s->me_method>=ME_EPZS){
1351
        int score[8];
1352
        int i, y;
1353
        UINT8 * fcode_tab= s->fcode_tab;
1354
        int best_fcode=-1;
1355
        int best_score=-10000000;
1356

    
1357
        for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
1358

    
1359
        for(y=0; y<s->mb_height; y++){
1360
            int x;
1361
            int xy= (y+1)* (s->mb_width+2) + 1;
1362
            i= y*s->mb_width;
1363
            for(x=0; x<s->mb_width; x++){
1364
                if(s->mb_type[i] & type){
1365
                    int fcode= FFMAX(fcode_tab[mv_table[xy][0] + MAX_MV],
1366
                                     fcode_tab[mv_table[xy][1] + MAX_MV]);
1367
                    int j;
1368
                    
1369
                    for(j=0; j<fcode && j<8; j++){
1370
                        if(s->pict_type==B_TYPE || s->current_picture.mc_mb_var[i] < s->current_picture.mb_var[i])
1371
                            score[j]-= 170;
1372
                    }
1373
                }
1374
                i++;
1375
                xy++;
1376
            }
1377
        }
1378
        
1379
        for(i=1; i<8; i++){
1380
            if(score[i] > best_score){
1381
                best_score= score[i];
1382
                best_fcode= i;
1383
            }
1384
//            printf("%d %d\n", i, score[i]);
1385
        }
1386

    
1387
//    printf("fcode: %d type: %d\n", i, s->pict_type);
1388
        return best_fcode;
1389
/*        for(i=0; i<=MAX_FCODE; i++){
1390
            printf("%d ", mv_num[i]);
1391
        }
1392
        printf("\n");*/
1393
    }else{
1394
        return 1;
1395
    }
1396
}
1397

    
1398
void ff_fix_long_p_mvs(MpegEncContext * s)
1399
{
1400
    const int f_code= s->f_code;
1401
    int y;
1402
    UINT8 * fcode_tab= s->fcode_tab;
1403
//int clip=0;
1404
//int noclip=0;
1405
    /* clip / convert to intra 16x16 type MVs */
1406
    for(y=0; y<s->mb_height; y++){
1407
        int x;
1408
        int xy= (y+1)* (s->mb_width+2)+1;
1409
        int i= y*s->mb_width;
1410
        for(x=0; x<s->mb_width; x++){
1411
            if(s->mb_type[i]&MB_TYPE_INTER){
1412
                if(   fcode_tab[s->p_mv_table[xy][0] + MAX_MV] > f_code
1413
                   || fcode_tab[s->p_mv_table[xy][0] + MAX_MV] == 0
1414
                   || fcode_tab[s->p_mv_table[xy][1] + MAX_MV] > f_code
1415
                   || fcode_tab[s->p_mv_table[xy][1] + MAX_MV] == 0 ){
1416
                    s->mb_type[i] &= ~MB_TYPE_INTER;
1417
                    s->mb_type[i] |= MB_TYPE_INTRA;
1418
                    s->p_mv_table[xy][0] = 0;
1419
                    s->p_mv_table[xy][1] = 0;
1420
//clip++;
1421
                }
1422
//else
1423
//  noclip++;
1424
            }
1425
            xy++;
1426
            i++;
1427
        }
1428
    }
1429
//printf("%d no:%d %d//\n", clip, noclip, f_code);
1430
    if(s->flags&CODEC_FLAG_4MV){
1431
        const int wrap= 2+ s->mb_width*2;
1432

    
1433
        /* clip / convert to intra 8x8 type MVs */
1434
        for(y=0; y<s->mb_height; y++){
1435
            int xy= (y*2 + 1)*wrap + 1;
1436
            int i= y*s->mb_width;
1437
            int x;
1438

    
1439
            for(x=0; x<s->mb_width; x++){
1440
                if(s->mb_type[i]&MB_TYPE_INTER4V){
1441
                    int block;
1442
                    for(block=0; block<4; block++){
1443
                        int off= (block& 1) + (block>>1)*wrap;
1444
                        int mx= s->motion_val[ xy + off ][0];
1445
                        int my= s->motion_val[ xy + off ][1];
1446

    
1447
                        if(   fcode_tab[mx + MAX_MV] > f_code
1448
                           || fcode_tab[mx + MAX_MV] == 0
1449
                           || fcode_tab[my + MAX_MV] > f_code
1450
                           || fcode_tab[my + MAX_MV] == 0 ){
1451
                            s->mb_type[i] &= ~MB_TYPE_INTER4V;
1452
                            s->mb_type[i] |= MB_TYPE_INTRA;
1453
                        }
1454
                    }
1455
                }
1456
                xy+=2;
1457
                i++;
1458
            }
1459
        }
1460
    }
1461
}
1462

    
1463
void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type)
1464
{
1465
    int y;
1466
    UINT8 * fcode_tab= s->fcode_tab;
1467

    
1468
    /* clip / convert to intra 16x16 type MVs */
1469
    for(y=0; y<s->mb_height; y++){
1470
        int x;
1471
        int xy= (y+1)* (s->mb_width+2)+1;
1472
        int i= y*s->mb_width;
1473
        for(x=0; x<s->mb_width; x++){
1474
            if(   fcode_tab[mv_table[xy][0] + MAX_MV] > f_code
1475
               || fcode_tab[mv_table[xy][0] + MAX_MV] == 0){
1476
                if(mv_table[xy][0]>0) mv_table[xy][0]=  (16<<f_code)-1;
1477
                else                  mv_table[xy][0]= -(16<<f_code);
1478
            }
1479
            if(   fcode_tab[mv_table[xy][1] + MAX_MV] > f_code
1480
               || fcode_tab[mv_table[xy][1] + MAX_MV] == 0){
1481
                if(mv_table[xy][1]>0) mv_table[xy][1]=  (16<<f_code)-1;
1482
                else                  mv_table[xy][1]= -(16<<f_code);
1483
            }
1484
            xy++;
1485
            i++;
1486
        }
1487
    }
1488
}