Statistics
| Branch: | Revision:

ffmpeg / libavcodec / motion_est.c @ 2912e87a

History | View | Annotate | Download (78.1 KB)

1
/*
2
 * Motion estimation
3
 * Copyright (c) 2000,2001 Fabrice Bellard
4
 * Copyright (c) 2002-2004 Michael Niedermayer
5
 *
6
 * new motion estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
7
 *
8
 * This file is part of Libav.
9
 *
10
 * Libav is free software; you can redistribute it and/or
11
 * modify it under the terms of the GNU Lesser General Public
12
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
14
 *
15
 * Libav is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
 * Lesser General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with Libav; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
 */
24

    
25
/**
26
 * @file
27
 * Motion estimation.
28
 */
29

    
30
#include <stdlib.h>
31
#include <stdio.h>
32
#include <limits.h>
33
#include "libavutil/intmath.h"
34
#include "avcodec.h"
35
#include "dsputil.h"
36
#include "mathops.h"
37
#include "mpegvideo.h"
38

    
39
#undef NDEBUG
40
#include <assert.h>
41

    
42
#define SQ(a) ((a)*(a))
43

    
44
#define P_LEFT P[1]
45
#define P_TOP P[2]
46
#define P_TOPRIGHT P[3]
47
#define P_MEDIAN P[4]
48
#define P_MV1 P[9]
49

    
50
static inline int sad_hpel_motion_search(MpegEncContext * s,
51
                                  int *mx_ptr, int *my_ptr, int dmin,
52
                                  int src_index, int ref_index,
53
                                  int size, int h);
54

    
55
static inline int update_map_generation(MotionEstContext *c)
56
{
57
    c->map_generation+= 1<<(ME_MAP_MV_BITS*2);
58
    if(c->map_generation==0){
59
        c->map_generation= 1<<(ME_MAP_MV_BITS*2);
60
        memset(c->map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
61
    }
62
    return c->map_generation;
63
}
64

    
65
/* shape adaptive search stuff */
66
typedef struct Minima{
67
    int height;
68
    int x, y;
69
    int checked;
70
}Minima;
71

    
72
static int minima_cmp(const void *a, const void *b){
73
    const Minima *da = (const Minima *) a;
74
    const Minima *db = (const Minima *) b;
75

    
76
    return da->height - db->height;
77
}
78

    
79
#define FLAG_QPEL   1 //must be 1
80
#define FLAG_CHROMA 2
81
#define FLAG_DIRECT 4
82

    
83
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
84
    const int offset[3]= {
85
          y*c->  stride + x,
86
        ((y*c->uvstride + x)>>1),
87
        ((y*c->uvstride + x)>>1),
88
    };
89
    int i;
90
    for(i=0; i<3; i++){
91
        c->src[0][i]= src [i] + offset[i];
92
        c->ref[0][i]= ref [i] + offset[i];
93
    }
94
    if(ref_index){
95
        for(i=0; i<3; i++){
96
            c->ref[ref_index][i]= ref2[i] + offset[i];
97
        }
98
    }
99
}
100

    
101
static int get_flags(MotionEstContext *c, int direct, int chroma){
102
    return   ((c->avctx->flags&CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
103
           + (direct ? FLAG_DIRECT : 0)
104
           + (chroma ? FLAG_CHROMA : 0);
105
}
106

    
107
static av_always_inline int cmp_direct_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
108
                      const int size, const int h, int ref_index, int src_index,
109
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel){
110
    MotionEstContext * const c= &s->me;
111
    const int stride= c->stride;
112
    const int hx= subx + (x<<(1+qpel));
113
    const int hy= suby + (y<<(1+qpel));
114
    uint8_t * const * const ref= c->ref[ref_index];
115
    uint8_t * const * const src= c->src[src_index];
116
    int d;
117
    //FIXME check chroma 4mv, (no crashes ...)
118
        assert(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1));
119
        if(x >= c->xmin && hx <= c->xmax<<(qpel+1) && y >= c->ymin && hy <= c->ymax<<(qpel+1)){
120
            const int time_pp= s->pp_time;
121
            const int time_pb= s->pb_time;
122
            const int mask= 2*qpel+1;
123
            if(s->mv_type==MV_TYPE_8X8){
124
                int i;
125
                for(i=0; i<4; i++){
126
                    int fx = c->direct_basis_mv[i][0] + hx;
127
                    int fy = c->direct_basis_mv[i][1] + hy;
128
                    int bx = hx ? fx - c->co_located_mv[i][0] : c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(qpel+4));
129
                    int by = hy ? fy - c->co_located_mv[i][1] : c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(qpel+4));
130
                    int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
131
                    int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
132

    
133
                    uint8_t *dst= c->temp + 8*(i&1) + 8*stride*(i>>1);
134
                    if(qpel){
135
                        c->qpel_put[1][fxy](dst, ref[0] + (fx>>2) + (fy>>2)*stride, stride);
136
                        c->qpel_avg[1][bxy](dst, ref[8] + (bx>>2) + (by>>2)*stride, stride);
137
                    }else{
138
                        c->hpel_put[1][fxy](dst, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 8);
139
                        c->hpel_avg[1][bxy](dst, ref[8] + (bx>>1) + (by>>1)*stride, stride, 8);
140
                    }
141
                }
142
            }else{
143
                int fx = c->direct_basis_mv[0][0] + hx;
144
                int fy = c->direct_basis_mv[0][1] + hy;
145
                int bx = hx ? fx - c->co_located_mv[0][0] : (c->co_located_mv[0][0]*(time_pb - time_pp)/time_pp);
146
                int by = hy ? fy - c->co_located_mv[0][1] : (c->co_located_mv[0][1]*(time_pb - time_pp)/time_pp);
147
                int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
148
                int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
149

    
150
                if(qpel){
151
                    c->qpel_put[1][fxy](c->temp               , ref[0] + (fx>>2) + (fy>>2)*stride               , stride);
152
                    c->qpel_put[1][fxy](c->temp + 8           , ref[0] + (fx>>2) + (fy>>2)*stride + 8           , stride);
153
                    c->qpel_put[1][fxy](c->temp     + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride     + 8*stride, stride);
154
                    c->qpel_put[1][fxy](c->temp + 8 + 8*stride, ref[0] + (fx>>2) + (fy>>2)*stride + 8 + 8*stride, stride);
155
                    c->qpel_avg[1][bxy](c->temp               , ref[8] + (bx>>2) + (by>>2)*stride               , stride);
156
                    c->qpel_avg[1][bxy](c->temp + 8           , ref[8] + (bx>>2) + (by>>2)*stride + 8           , stride);
157
                    c->qpel_avg[1][bxy](c->temp     + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride     + 8*stride, stride);
158
                    c->qpel_avg[1][bxy](c->temp + 8 + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8 + 8*stride, stride);
159
                }else{
160
                    assert((fx>>1) + 16*s->mb_x >= -16);
161
                    assert((fy>>1) + 16*s->mb_y >= -16);
162
                    assert((fx>>1) + 16*s->mb_x <= s->width);
163
                    assert((fy>>1) + 16*s->mb_y <= s->height);
164
                    assert((bx>>1) + 16*s->mb_x >= -16);
165
                    assert((by>>1) + 16*s->mb_y >= -16);
166
                    assert((bx>>1) + 16*s->mb_x <= s->width);
167
                    assert((by>>1) + 16*s->mb_y <= s->height);
168

    
169
                    c->hpel_put[0][fxy](c->temp, ref[0] + (fx>>1) + (fy>>1)*stride, stride, 16);
170
                    c->hpel_avg[0][bxy](c->temp, ref[8] + (bx>>1) + (by>>1)*stride, stride, 16);
171
                }
172
            }
173
            d = cmp_func(s, c->temp, src[0], stride, 16);
174
        }else
175
            d= 256*256*256*32;
176
    return d;
177
}
178

    
179
static av_always_inline int cmp_inline(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
180
                      const int size, const int h, int ref_index, int src_index,
181
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, int qpel, int chroma){
182
    MotionEstContext * const c= &s->me;
183
    const int stride= c->stride;
184
    const int uvstride= c->uvstride;
185
    const int dxy= subx + (suby<<(1+qpel)); //FIXME log2_subpel?
186
    const int hx= subx + (x<<(1+qpel));
187
    const int hy= suby + (y<<(1+qpel));
188
    uint8_t * const * const ref= c->ref[ref_index];
189
    uint8_t * const * const src= c->src[src_index];
190
    int d;
191
    //FIXME check chroma 4mv, (no crashes ...)
192
        int uvdxy;              /* no, it might not be used uninitialized */
193
        if(dxy){
194
            if(qpel){
195
                c->qpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride); //FIXME prototype (add h)
196
                if(chroma){
197
                    int cx= hx/2;
198
                    int cy= hy/2;
199
                    cx= (cx>>1)|(cx&1);
200
                    cy= (cy>>1)|(cy&1);
201
                    uvdxy= (cx&1) + 2*(cy&1);
202
                    //FIXME x/y wrong, but mpeg4 qpel is sick anyway, we should drop as much of it as possible in favor for h264
203
                }
204
            }else{
205
                c->hpel_put[size][dxy](c->temp, ref[0] + x + y*stride, stride, h);
206
                if(chroma)
207
                    uvdxy= dxy | (x&1) | (2*(y&1));
208
            }
209
            d = cmp_func(s, c->temp, src[0], stride, h);
210
        }else{
211
            d = cmp_func(s, src[0], ref[0] + x + y*stride, stride, h);
212
            if(chroma)
213
                uvdxy= (x&1) + 2*(y&1);
214
        }
215
        if(chroma){
216
            uint8_t * const uvtemp= c->temp + 16*stride;
217
            c->hpel_put[size+1][uvdxy](uvtemp  , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
218
            c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
219
            d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1);
220
            d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
221
        }
222
    return d;
223
}
224

    
225
static int cmp_simple(MpegEncContext *s, const int x, const int y,
226
                      int ref_index, int src_index,
227
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func){
228
    return cmp_inline(s,x,y,0,0,0,16,ref_index,src_index, cmp_func, chroma_cmp_func, 0, 0);
229
}
230

    
231
static int cmp_fpel_internal(MpegEncContext *s, const int x, const int y,
232
                      const int size, const int h, int ref_index, int src_index,
233
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
234
    if(flags&FLAG_DIRECT){
235
        return cmp_direct_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
236
    }else{
237
        return cmp_inline(s,x,y,0,0,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
238
    }
239
}
240

    
241
static int cmp_internal(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
242
                      const int size, const int h, int ref_index, int src_index,
243
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
244
    if(flags&FLAG_DIRECT){
245
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL);
246
    }else{
247
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags&FLAG_QPEL, flags&FLAG_CHROMA);
248
    }
249
}
250

    
251
/*! \brief compares a block (either a full macroblock or a partition thereof)
252
    against a proposed motion-compensated prediction of that block
253
 */
254
static av_always_inline int cmp(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
255
                      const int size, const int h, int ref_index, int src_index,
256
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
257
    if(av_builtin_constant_p(flags) && av_builtin_constant_p(h) && av_builtin_constant_p(size)
258
       && av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
259
       && flags==0 && h==16 && size==0 && subx==0 && suby==0){
260
        return cmp_simple(s,x,y,ref_index,src_index, cmp_func, chroma_cmp_func);
261
    }else if(av_builtin_constant_p(subx) && av_builtin_constant_p(suby)
262
       && subx==0 && suby==0){
263
        return cmp_fpel_internal(s,x,y,size,h,ref_index,src_index, cmp_func, chroma_cmp_func,flags);
264
    }else{
265
        return cmp_internal(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, flags);
266
    }
267
}
268

    
269
static int cmp_hpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
270
                      const int size, const int h, int ref_index, int src_index,
271
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
272
    if(flags&FLAG_DIRECT){
273
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0);
274
    }else{
275
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 0, flags&FLAG_CHROMA);
276
    }
277
}
278

    
279
static int cmp_qpel(MpegEncContext *s, const int x, const int y, const int subx, const int suby,
280
                      const int size, const int h, int ref_index, int src_index,
281
                      me_cmp_func cmp_func, me_cmp_func chroma_cmp_func, const int flags){
282
    if(flags&FLAG_DIRECT){
283
        return cmp_direct_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1);
284
    }else{
285
        return cmp_inline(s,x,y,subx,suby,size,h,ref_index,src_index, cmp_func, chroma_cmp_func, 1, flags&FLAG_CHROMA);
286
    }
287
}
288

    
289
#include "motion_est_template.c"
290

    
291
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
292
    return 0;
293
}
294

    
295
static void zero_hpel(uint8_t *a, const uint8_t *b, int stride, int h){
296
}
297

    
298
int ff_init_me(MpegEncContext *s){
299
    MotionEstContext * const c= &s->me;
300
    int cache_size= FFMIN(ME_MAP_SIZE>>ME_MAP_SHIFT, 1<<ME_MAP_SHIFT);
301
    int dia_size= FFMAX(FFABS(s->avctx->dia_size)&255, FFABS(s->avctx->pre_dia_size)&255);
302

    
303
    if(FFMIN(s->avctx->dia_size, s->avctx->pre_dia_size) < -ME_MAP_SIZE){
304
        av_log(s->avctx, AV_LOG_ERROR, "ME_MAP size is too small for SAB diamond\n");
305
        return -1;
306
    }
307
    //special case of snow is needed because snow uses its own iterative ME code
308
    if(s->me_method!=ME_ZERO && s->me_method!=ME_EPZS && s->me_method!=ME_X1 && s->avctx->codec_id != CODEC_ID_SNOW){
309
        av_log(s->avctx, AV_LOG_ERROR, "me_method is only allowed to be set to zero and epzs; for hex,umh,full and others see dia_size\n");
310
        return -1;
311
    }
312

    
313
    c->avctx= s->avctx;
314

    
315
    if(cache_size < 2*dia_size && !c->stride){
316
        av_log(s->avctx, AV_LOG_INFO, "ME_MAP size may be a little small for the selected diamond size\n");
317
    }
318

    
319
    ff_set_cmp(&s->dsp, s->dsp.me_pre_cmp, c->avctx->me_pre_cmp);
320
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, c->avctx->me_cmp);
321
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, c->avctx->me_sub_cmp);
322
    ff_set_cmp(&s->dsp, s->dsp.mb_cmp, c->avctx->mb_cmp);
323

    
324
    c->flags    = get_flags(c, 0, c->avctx->me_cmp    &FF_CMP_CHROMA);
325
    c->sub_flags= get_flags(c, 0, c->avctx->me_sub_cmp&FF_CMP_CHROMA);
326
    c->mb_flags = get_flags(c, 0, c->avctx->mb_cmp    &FF_CMP_CHROMA);
327

    
328
/*FIXME s->no_rounding b_type*/
329
    if(s->flags&CODEC_FLAG_QPEL){
330
        c->sub_motion_search= qpel_motion_search;
331
        c->qpel_avg= s->dsp.avg_qpel_pixels_tab;
332
        if(s->no_rounding) c->qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab;
333
        else               c->qpel_put= s->dsp.put_qpel_pixels_tab;
334
    }else{
335
        if(c->avctx->me_sub_cmp&FF_CMP_CHROMA)
336
            c->sub_motion_search= hpel_motion_search;
337
        else if(   c->avctx->me_sub_cmp == FF_CMP_SAD
338
                && c->avctx->    me_cmp == FF_CMP_SAD
339
                && c->avctx->    mb_cmp == FF_CMP_SAD)
340
            c->sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
341
        else
342
            c->sub_motion_search= hpel_motion_search;
343
    }
344
    c->hpel_avg= s->dsp.avg_pixels_tab;
345
    if(s->no_rounding) c->hpel_put= s->dsp.put_no_rnd_pixels_tab;
346
    else               c->hpel_put= s->dsp.put_pixels_tab;
347

    
348
    if(s->linesize){
349
        c->stride  = s->linesize;
350
        c->uvstride= s->uvlinesize;
351
    }else{
352
        c->stride  = 16*s->mb_width + 32;
353
        c->uvstride=  8*s->mb_width + 16;
354
    }
355

    
356
    /* 8x8 fullpel search would need a 4x4 chroma compare, which we do
357
     * not have yet, and even if we had, the motion estimation code
358
     * does not expect it. */
359
    if(s->codec_id != CODEC_ID_SNOW){
360
        if((c->avctx->me_cmp&FF_CMP_CHROMA)/* && !s->dsp.me_cmp[2]*/){
361
            s->dsp.me_cmp[2]= zero_cmp;
362
        }
363
        if((c->avctx->me_sub_cmp&FF_CMP_CHROMA) && !s->dsp.me_sub_cmp[2]){
364
            s->dsp.me_sub_cmp[2]= zero_cmp;
365
        }
366
        c->hpel_put[2][0]= c->hpel_put[2][1]=
367
        c->hpel_put[2][2]= c->hpel_put[2][3]= zero_hpel;
368
    }
369

    
370
    if(s->codec_id == CODEC_ID_H261){
371
        c->sub_motion_search= no_sub_motion_search;
372
    }
373

    
374
    return 0;
375
}
376

    
377
#if 0
378
static int pix_dev(uint8_t * pix, int line_size, int mean)
379
{
380
    int s, i, j;
381

382
    s = 0;
383
    for (i = 0; i < 16; i++) {
384
        for (j = 0; j < 16; j += 8) {
385
            s += FFABS(pix[0]-mean);
386
            s += FFABS(pix[1]-mean);
387
            s += FFABS(pix[2]-mean);
388
            s += FFABS(pix[3]-mean);
389
            s += FFABS(pix[4]-mean);
390
            s += FFABS(pix[5]-mean);
391
            s += FFABS(pix[6]-mean);
392
            s += FFABS(pix[7]-mean);
393
            pix += 8;
394
        }
395
        pix += line_size - 16;
396
    }
397
    return s;
398
}
399
#endif
400

    
401
static inline void no_motion_search(MpegEncContext * s,
402
                                    int *mx_ptr, int *my_ptr)
403
{
404
    *mx_ptr = 16 * s->mb_x;
405
    *my_ptr = 16 * s->mb_y;
406
}
407

    
408
#define Z_THRESHOLD 256
409

    
410
#define CHECK_SAD_HALF_MV(suffix, x, y) \
411
{\
412
    d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\
413
    d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
414
    COPY3_IF_LT(dminh, d, dx, x, dy, y)\
415
}
416

    
417
static inline int sad_hpel_motion_search(MpegEncContext * s,
418
                                  int *mx_ptr, int *my_ptr, int dmin,
419
                                  int src_index, int ref_index,
420
                                  int size, int h)
421
{
422
    MotionEstContext * const c= &s->me;
423
    const int penalty_factor= c->sub_penalty_factor;
424
    int mx, my, dminh;
425
    uint8_t *pix, *ptr;
426
    int stride= c->stride;
427
    const int flags= c->sub_flags;
428
    LOAD_COMMON
429

    
430
    assert(flags == 0);
431

    
432
    if(c->skip){
433
//    printf("S");
434
        *mx_ptr = 0;
435
        *my_ptr = 0;
436
        return dmin;
437
    }
438
//    printf("N");
439

    
440
    pix = c->src[src_index][0];
441

    
442
    mx = *mx_ptr;
443
    my = *my_ptr;
444
    ptr = c->ref[ref_index][0] + (my * stride) + mx;
445

    
446
    dminh = dmin;
447

    
448
    if (mx > xmin && mx < xmax &&
449
        my > ymin && my < ymax) {
450
        int dx=0, dy=0;
451
        int d, pen_x, pen_y;
452
        const int index= (my<<ME_MAP_SHIFT) + mx;
453
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
454
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
455
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)];
456
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
457
        mx<<=1;
458
        my<<=1;
459

    
460

    
461
        pen_x= pred_x + mx;
462
        pen_y= pred_y + my;
463

    
464
        ptr-= stride;
465
        if(t<=b){
466
            CHECK_SAD_HALF_MV(y2 , 0, -1)
467
            if(l<=r){
468
                CHECK_SAD_HALF_MV(xy2, -1, -1)
469
                if(t+r<=b+l){
470
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
471
                    ptr+= stride;
472
                }else{
473
                    ptr+= stride;
474
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
475
                }
476
                CHECK_SAD_HALF_MV(x2 , -1,  0)
477
            }else{
478
                CHECK_SAD_HALF_MV(xy2, +1, -1)
479
                if(t+l<=b+r){
480
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
481
                    ptr+= stride;
482
                }else{
483
                    ptr+= stride;
484
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
485
                }
486
                CHECK_SAD_HALF_MV(x2 , +1,  0)
487
            }
488
        }else{
489
            if(l<=r){
490
                if(t+l<=b+r){
491
                    CHECK_SAD_HALF_MV(xy2, -1, -1)
492
                    ptr+= stride;
493
                }else{
494
                    ptr+= stride;
495
                    CHECK_SAD_HALF_MV(xy2, +1, +1)
496
                }
497
                CHECK_SAD_HALF_MV(x2 , -1,  0)
498
                CHECK_SAD_HALF_MV(xy2, -1, +1)
499
            }else{
500
                if(t+r<=b+l){
501
                    CHECK_SAD_HALF_MV(xy2, +1, -1)
502
                    ptr+= stride;
503
                }else{
504
                    ptr+= stride;
505
                    CHECK_SAD_HALF_MV(xy2, -1, +1)
506
                }
507
                CHECK_SAD_HALF_MV(x2 , +1,  0)
508
                CHECK_SAD_HALF_MV(xy2, +1, +1)
509
            }
510
            CHECK_SAD_HALF_MV(y2 ,  0, +1)
511
        }
512
        mx+=dx;
513
        my+=dy;
514

    
515
    }else{
516
        mx<<=1;
517
        my<<=1;
518
    }
519

    
520
    *mx_ptr = mx;
521
    *my_ptr = my;
522
    return dminh;
523
}
524

    
525
static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
526
{
527
    const int xy= s->mb_x + s->mb_y*s->mb_stride;
528

    
529
    s->p_mv_table[xy][0] = mx;
530
    s->p_mv_table[xy][1] = my;
531

    
532
    /* has already been set to the 4 MV if 4MV is done */
533
    if(mv4){
534
        int mot_xy= s->block_index[0];
535

    
536
        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
537
        s->current_picture.motion_val[0][mot_xy  ][1]= my;
538
        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
539
        s->current_picture.motion_val[0][mot_xy+1][1]= my;
540

    
541
        mot_xy += s->b8_stride;
542
        s->current_picture.motion_val[0][mot_xy  ][0]= mx;
543
        s->current_picture.motion_val[0][mot_xy  ][1]= my;
544
        s->current_picture.motion_val[0][mot_xy+1][0]= mx;
545
        s->current_picture.motion_val[0][mot_xy+1][1]= my;
546
    }
547
}
548

    
549
/**
550
 * get fullpel ME search limits.
551
 */
552
static inline void get_limits(MpegEncContext *s, int x, int y)
553
{
554
    MotionEstContext * const c= &s->me;
555
    int range= c->avctx->me_range >> (1 + !!(c->flags&FLAG_QPEL));
556
/*
557
    if(c->avctx->me_range) c->range= c->avctx->me_range >> 1;
558
    else                   c->range= 16;
559
*/
560
    if (s->unrestricted_mv) {
561
        c->xmin = - x - 16;
562
        c->ymin = - y - 16;
563
        c->xmax = - x + s->mb_width *16;
564
        c->ymax = - y + s->mb_height*16;
565
    } else if (s->out_format == FMT_H261){
566
        // Search range of H261 is different from other codec standards
567
        c->xmin = (x > 15) ? - 15 : 0;
568
        c->ymin = (y > 15) ? - 15 : 0;
569
        c->xmax = (x < s->mb_width * 16 - 16) ? 15 : 0;
570
        c->ymax = (y < s->mb_height * 16 - 16) ? 15 : 0;
571
    } else {
572
        c->xmin = - x;
573
        c->ymin = - y;
574
        c->xmax = - x + s->mb_width *16 - 16;
575
        c->ymax = - y + s->mb_height*16 - 16;
576
    }
577
    if(range){
578
        c->xmin = FFMAX(c->xmin,-range);
579
        c->xmax = FFMIN(c->xmax, range);
580
        c->ymin = FFMAX(c->ymin,-range);
581
        c->ymax = FFMIN(c->ymax, range);
582
    }
583
}
584

    
585
static inline void init_mv4_ref(MotionEstContext *c){
586
    const int stride= c->stride;
587

    
588
    c->ref[1][0] = c->ref[0][0] + 8;
589
    c->ref[2][0] = c->ref[0][0] + 8*stride;
590
    c->ref[3][0] = c->ref[2][0] + 8;
591
    c->src[1][0] = c->src[0][0] + 8;
592
    c->src[2][0] = c->src[0][0] + 8*stride;
593
    c->src[3][0] = c->src[2][0] + 8;
594
}
595

    
596
static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
597
{
598
    MotionEstContext * const c= &s->me;
599
    const int size= 1;
600
    const int h=8;
601
    int block;
602
    int P[10][2];
603
    int dmin_sum=0, mx4_sum=0, my4_sum=0;
604
    int same=1;
605
    const int stride= c->stride;
606
    uint8_t *mv_penalty= c->current_mv_penalty;
607

    
608
    init_mv4_ref(c);
609

    
610
    for(block=0; block<4; block++){
611
        int mx4, my4;
612
        int pred_x4, pred_y4;
613
        int dmin4;
614
        static const int off[4]= {2, 1, 1, -1};
615
        const int mot_stride = s->b8_stride;
616
        const int mot_xy = s->block_index[block];
617

    
618
        P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
619
        P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
620

    
621
        if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
622

    
623
        /* special case for first line */
624
        if (s->first_slice_line && block<2) {
625
            c->pred_x= pred_x4= P_LEFT[0];
626
            c->pred_y= pred_y4= P_LEFT[1];
627
        } else {
628
            P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][0];
629
            P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride             ][1];
630
            P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0];
631
            P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1];
632
            if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
633
            if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
634
            if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
635
            if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
636

    
637
            P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
638
            P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
639

    
640
            c->pred_x= pred_x4 = P_MEDIAN[0];
641
            c->pred_y= pred_y4 = P_MEDIAN[1];
642
        }
643
        P_MV1[0]= mx;
644
        P_MV1[1]= my;
645

    
646
        dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
647

    
648
        dmin4= c->sub_motion_search(s, &mx4, &my4, dmin4, block, block, size, h);
649

    
650
        if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
651
            int dxy;
652
            const int offset= ((block&1) + (block>>1)*stride)*8;
653
            uint8_t *dest_y = c->scratchpad + offset;
654
            if(s->quarter_sample){
655
                uint8_t *ref= c->ref[block][0] + (mx4>>2) + (my4>>2)*stride;
656
                dxy = ((my4 & 3) << 2) | (mx4 & 3);
657

    
658
                if(s->no_rounding)
659
                    s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y   , ref    , stride);
660
                else
661
                    s->dsp.put_qpel_pixels_tab       [1][dxy](dest_y   , ref    , stride);
662
            }else{
663
                uint8_t *ref= c->ref[block][0] + (mx4>>1) + (my4>>1)*stride;
664
                dxy = ((my4 & 1) << 1) | (mx4 & 1);
665

    
666
                if(s->no_rounding)
667
                    s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y    , ref    , stride, h);
668
                else
669
                    s->dsp.put_pixels_tab       [1][dxy](dest_y    , ref    , stride, h);
670
            }
671
            dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*c->mb_penalty_factor;
672
        }else
673
            dmin_sum+= dmin4;
674

    
675
        if(s->quarter_sample){
676
            mx4_sum+= mx4/2;
677
            my4_sum+= my4/2;
678
        }else{
679
            mx4_sum+= mx4;
680
            my4_sum+= my4;
681
        }
682

    
683
        s->current_picture.motion_val[0][ s->block_index[block] ][0]= mx4;
684
        s->current_picture.motion_val[0][ s->block_index[block] ][1]= my4;
685

    
686
        if(mx4 != mx || my4 != my) same=0;
687
    }
688

    
689
    if(same)
690
        return INT_MAX;
691

    
692
    if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
693
        dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*stride, c->scratchpad, stride, 16);
694
    }
695

    
696
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
697
        int dxy;
698
        int mx, my;
699
        int offset;
700

    
701
        mx= ff_h263_round_chroma(mx4_sum);
702
        my= ff_h263_round_chroma(my4_sum);
703
        dxy = ((my & 1) << 1) | (mx & 1);
704

    
705
        offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize;
706

    
707
        if(s->no_rounding){
708
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
709
            s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
710
        }else{
711
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
712
            s->dsp.put_pixels_tab       [1][dxy](c->scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
713
        }
714

    
715
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad  , s->uvlinesize, 8);
716
        dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad+8, s->uvlinesize, 8);
717
    }
718

    
719
    c->pred_x= mx;
720
    c->pred_y= my;
721

    
722
    switch(c->avctx->mb_cmp&0xFF){
723
    /*case FF_CMP_SSE:
724
        return dmin_sum+ 32*s->qscale*s->qscale;*/
725
    case FF_CMP_RD:
726
        return dmin_sum;
727
    default:
728
        return dmin_sum+ 11*c->mb_penalty_factor;
729
    }
730
}
731

    
732
static inline void init_interlaced_ref(MpegEncContext *s, int ref_index){
733
    MotionEstContext * const c= &s->me;
734

    
735
    c->ref[1+ref_index][0] = c->ref[0+ref_index][0] + s->linesize;
736
    c->src[1][0] = c->src[0][0] + s->linesize;
737
    if(c->flags & FLAG_CHROMA){
738
        c->ref[1+ref_index][1] = c->ref[0+ref_index][1] + s->uvlinesize;
739
        c->ref[1+ref_index][2] = c->ref[0+ref_index][2] + s->uvlinesize;
740
        c->src[1][1] = c->src[0][1] + s->uvlinesize;
741
        c->src[1][2] = c->src[0][2] + s->uvlinesize;
742
    }
743
}
744

    
745
static int interlaced_search(MpegEncContext *s, int ref_index,
746
                             int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int mx, int my, int user_field_select)
747
{
748
    MotionEstContext * const c= &s->me;
749
    const int size=0;
750
    const int h=8;
751
    int block;
752
    int P[10][2];
753
    uint8_t * const mv_penalty= c->current_mv_penalty;
754
    int same=1;
755
    const int stride= 2*s->linesize;
756
    int dmin_sum= 0;
757
    const int mot_stride= s->mb_stride;
758
    const int xy= s->mb_x + s->mb_y*mot_stride;
759

    
760
    c->ymin>>=1;
761
    c->ymax>>=1;
762
    c->stride<<=1;
763
    c->uvstride<<=1;
764
    init_interlaced_ref(s, ref_index);
765

    
766
    for(block=0; block<2; block++){
767
        int field_select;
768
        int best_dmin= INT_MAX;
769
        int best_field= -1;
770

    
771
        for(field_select=0; field_select<2; field_select++){
772
            int dmin, mx_i, my_i;
773
            int16_t (*mv_table)[2]= mv_tables[block][field_select];
774

    
775
            if(user_field_select){
776
                assert(field_select==0 || field_select==1);
777
                assert(field_select_tables[block][xy]==0 || field_select_tables[block][xy]==1);
778
                if(field_select_tables[block][xy] != field_select)
779
                    continue;
780
            }
781

    
782
            P_LEFT[0] = mv_table[xy - 1][0];
783
            P_LEFT[1] = mv_table[xy - 1][1];
784
            if(P_LEFT[0]       > (c->xmax<<1)) P_LEFT[0]       = (c->xmax<<1);
785

    
786
            c->pred_x= P_LEFT[0];
787
            c->pred_y= P_LEFT[1];
788

    
789
            if(!s->first_slice_line){
790
                P_TOP[0]      = mv_table[xy - mot_stride][0];
791
                P_TOP[1]      = mv_table[xy - mot_stride][1];
792
                P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
793
                P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
794
                if(P_TOP[1]      > (c->ymax<<1)) P_TOP[1]     = (c->ymax<<1);
795
                if(P_TOPRIGHT[0] < (c->xmin<<1)) P_TOPRIGHT[0]= (c->xmin<<1);
796
                if(P_TOPRIGHT[0] > (c->xmax<<1)) P_TOPRIGHT[0]= (c->xmax<<1);
797
                if(P_TOPRIGHT[1] > (c->ymax<<1)) P_TOPRIGHT[1]= (c->ymax<<1);
798

    
799
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
800
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
801
            }
802
            P_MV1[0]= mx; //FIXME not correct if block != field_select
803
            P_MV1[1]= my / 2;
804

    
805
            dmin = epzs_motion_search2(s, &mx_i, &my_i, P, block, field_select+ref_index, mv_table, (1<<16)>>1);
806

    
807
            dmin= c->sub_motion_search(s, &mx_i, &my_i, dmin, block, field_select+ref_index, size, h);
808

    
809
            mv_table[xy][0]= mx_i;
810
            mv_table[xy][1]= my_i;
811

    
812
            if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
813
                int dxy;
814

    
815
                //FIXME chroma ME
816
                uint8_t *ref= c->ref[field_select+ref_index][0] + (mx_i>>1) + (my_i>>1)*stride;
817
                dxy = ((my_i & 1) << 1) | (mx_i & 1);
818

    
819
                if(s->no_rounding){
820
                    s->dsp.put_no_rnd_pixels_tab[size][dxy](c->scratchpad, ref    , stride, h);
821
                }else{
822
                    s->dsp.put_pixels_tab       [size][dxy](c->scratchpad, ref    , stride, h);
823
                }
824
                dmin= s->dsp.mb_cmp[size](s, c->src[block][0], c->scratchpad, stride, h);
825
                dmin+= (mv_penalty[mx_i-c->pred_x] + mv_penalty[my_i-c->pred_y] + 1)*c->mb_penalty_factor;
826
            }else
827
                dmin+= c->mb_penalty_factor; //field_select bits
828

    
829
            dmin += field_select != block; //slightly prefer same field
830

    
831
            if(dmin < best_dmin){
832
                best_dmin= dmin;
833
                best_field= field_select;
834
            }
835
        }
836
        {
837
            int16_t (*mv_table)[2]= mv_tables[block][best_field];
838

    
839
            if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all
840
            if(mv_table[xy][1]&1) same=0;
841
            if(mv_table[xy][1]*2 != my) same=0;
842
            if(best_field != block) same=0;
843
        }
844

    
845
        field_select_tables[block][xy]= best_field;
846
        dmin_sum += best_dmin;
847
    }
848

    
849
    c->ymin<<=1;
850
    c->ymax<<=1;
851
    c->stride>>=1;
852
    c->uvstride>>=1;
853

    
854
    if(same)
855
        return INT_MAX;
856

    
857
    switch(c->avctx->mb_cmp&0xFF){
858
    /*case FF_CMP_SSE:
859
        return dmin_sum+ 32*s->qscale*s->qscale;*/
860
    case FF_CMP_RD:
861
        return dmin_sum;
862
    default:
863
        return dmin_sum+ 11*c->mb_penalty_factor;
864
    }
865
}
866

    
867
static void clip_input_mv(MpegEncContext * s, int16_t *mv, int interlaced){
868
    int ymax= s->me.ymax>>interlaced;
869
    int ymin= s->me.ymin>>interlaced;
870

    
871
    if(mv[0] < s->me.xmin) mv[0] = s->me.xmin;
872
    if(mv[0] > s->me.xmax) mv[0] = s->me.xmax;
873
    if(mv[1] <       ymin) mv[1] =       ymin;
874
    if(mv[1] >       ymax) mv[1] =       ymax;
875
}
876

    
877
static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int p_type){
878
    MotionEstContext * const c= &s->me;
879
    Picture *p= s->current_picture_ptr;
880
    int mb_xy= mb_x + mb_y*s->mb_stride;
881
    int xy= 2*mb_x + 2*mb_y*s->b8_stride;
882
    int mb_type= s->current_picture.mb_type[mb_xy];
883
    int flags= c->flags;
884
    int shift= (flags&FLAG_QPEL) + 1;
885
    int mask= (1<<shift)-1;
886
    int x, y, i;
887
    int d=0;
888
    me_cmp_func cmpf= s->dsp.sse[0];
889
    me_cmp_func chroma_cmpf= s->dsp.sse[1];
890

    
891
    if(p_type && USES_LIST(mb_type, 1)){
892
        av_log(c->avctx, AV_LOG_ERROR, "backward motion vector in P frame\n");
893
        return INT_MAX/2;
894
    }
895
    assert(IS_INTRA(mb_type) || USES_LIST(mb_type,0) || USES_LIST(mb_type,1));
896

    
897
    for(i=0; i<4; i++){
898
        int xy= s->block_index[i];
899
        clip_input_mv(s, p->motion_val[0][xy], !!IS_INTERLACED(mb_type));
900
        clip_input_mv(s, p->motion_val[1][xy], !!IS_INTERLACED(mb_type));
901
    }
902

    
903
    if(IS_INTERLACED(mb_type)){
904
        int xy2= xy  + s->b8_stride;
905
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
906
        c->stride<<=1;
907
        c->uvstride<<=1;
908

    
909
        if(!(s->flags & CODEC_FLAG_INTERLACED_ME)){
910
            av_log(c->avctx, AV_LOG_ERROR, "Interlaced macroblock selected but interlaced motion estimation disabled\n");
911
            return INT_MAX/2;
912
        }
913

    
914
        if(USES_LIST(mb_type, 0)){
915
            int field_select0= p->ref_index[0][4*mb_xy  ];
916
            int field_select1= p->ref_index[0][4*mb_xy+2];
917
            assert(field_select0==0 ||field_select0==1);
918
            assert(field_select1==0 ||field_select1==1);
919
            init_interlaced_ref(s, 0);
920

    
921
            if(p_type){
922
                s->p_field_select_table[0][mb_xy]= field_select0;
923
                s->p_field_select_table[1][mb_xy]= field_select1;
924
                *(uint32_t*)s->p_field_mv_table[0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
925
                *(uint32_t*)s->p_field_mv_table[1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
926
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER_I;
927
            }else{
928
                s->b_field_select_table[0][0][mb_xy]= field_select0;
929
                s->b_field_select_table[0][1][mb_xy]= field_select1;
930
                *(uint32_t*)s->b_field_mv_table[0][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[0][xy ];
931
                *(uint32_t*)s->b_field_mv_table[0][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[0][xy2];
932
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_FORWARD_I;
933
            }
934

    
935
            x= p->motion_val[0][xy ][0];
936
            y= p->motion_val[0][xy ][1];
937
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0, 0, cmpf, chroma_cmpf, flags);
938
            x= p->motion_val[0][xy2][0];
939
            y= p->motion_val[0][xy2][1];
940
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1, 1, cmpf, chroma_cmpf, flags);
941
        }
942
        if(USES_LIST(mb_type, 1)){
943
            int field_select0= p->ref_index[1][4*mb_xy  ];
944
            int field_select1= p->ref_index[1][4*mb_xy+2];
945
            assert(field_select0==0 ||field_select0==1);
946
            assert(field_select1==0 ||field_select1==1);
947
            init_interlaced_ref(s, 2);
948

    
949
            s->b_field_select_table[1][0][mb_xy]= field_select0;
950
            s->b_field_select_table[1][1][mb_xy]= field_select1;
951
            *(uint32_t*)s->b_field_mv_table[1][0][field_select0][mb_xy]= *(uint32_t*)p->motion_val[1][xy ];
952
            *(uint32_t*)s->b_field_mv_table[1][1][field_select1][mb_xy]= *(uint32_t*)p->motion_val[1][xy2];
953
            if(USES_LIST(mb_type, 0)){
954
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BIDIR_I;
955
            }else{
956
                s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BACKWARD_I;
957
            }
958

    
959
            x= p->motion_val[1][xy ][0];
960
            y= p->motion_val[1][xy ][1];
961
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0+2, 0, cmpf, chroma_cmpf, flags);
962
            x= p->motion_val[1][xy2][0];
963
            y= p->motion_val[1][xy2][1];
964
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1+2, 1, cmpf, chroma_cmpf, flags);
965
            //FIXME bidir scores
966
        }
967
        c->stride>>=1;
968
        c->uvstride>>=1;
969
    }else if(IS_8X8(mb_type)){
970
        if(!(s->flags & CODEC_FLAG_4MV)){
971
            av_log(c->avctx, AV_LOG_ERROR, "4MV macroblock selected but 4MV encoding disabled\n");
972
            return INT_MAX/2;
973
        }
974
        cmpf= s->dsp.sse[1];
975
        chroma_cmpf= s->dsp.sse[1];
976
        init_mv4_ref(c);
977
        for(i=0; i<4; i++){
978
            xy= s->block_index[i];
979
            x= p->motion_val[0][xy][0];
980
            y= p->motion_val[0][xy][1];
981
            d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 1, 8, i, i, cmpf, chroma_cmpf, flags);
982
        }
983
        s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER4V;
984
    }else{
985
        if(USES_LIST(mb_type, 0)){
986
            if(p_type){
987
                *(uint32_t*)s->p_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
988
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTER;
989
            }else if(USES_LIST(mb_type, 1)){
990
                *(uint32_t*)s->b_bidir_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
991
                *(uint32_t*)s->b_bidir_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
992
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BIDIR;
993
            }else{
994
                *(uint32_t*)s->b_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
995
                s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_FORWARD;
996
            }
997
            x= p->motion_val[0][xy][0];
998
            y= p->motion_val[0][xy][1];
999
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 0, 0, cmpf, chroma_cmpf, flags);
1000
        }else if(USES_LIST(mb_type, 1)){
1001
            *(uint32_t*)s->b_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
1002
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BACKWARD;
1003

    
1004
            x= p->motion_val[1][xy][0];
1005
            y= p->motion_val[1][xy][1];
1006
            d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 2, 0, cmpf, chroma_cmpf, flags);
1007
        }else
1008
            s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
1009
    }
1010
    return d;
1011
}
1012

    
1013
void ff_estimate_p_frame_motion(MpegEncContext * s,
1014
                                int mb_x, int mb_y)
1015
{
1016
    MotionEstContext * const c= &s->me;
1017
    uint8_t *pix, *ppix;
1018
    int sum, mx, my, dmin;
1019
    int varc;            ///< the variance of the block (sum of squared (p[y][x]-average))
1020
    int vard;            ///< sum of squared differences with the estimated motion vector
1021
    int P[10][2];
1022
    const int shift= 1+s->quarter_sample;
1023
    int mb_type=0;
1024
    Picture * const pic= &s->current_picture;
1025

    
1026
    init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1027

    
1028
    assert(s->quarter_sample==0 || s->quarter_sample==1);
1029
    assert(s->linesize == c->stride);
1030
    assert(s->uvlinesize == c->uvstride);
1031

    
1032
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1033
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1034
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1035
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1036

    
1037
    get_limits(s, 16*mb_x, 16*mb_y);
1038
    c->skip=0;
1039

    
1040
    /* intra / predictive decision */
1041
    pix = c->src[0][0];
1042
    sum = s->dsp.pix_sum(pix, s->linesize);
1043
    varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
1044

    
1045
    pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1046
    pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
1047
    c->mb_var_sum_temp += (varc+128)>>8;
1048

    
1049
    if(c->avctx->me_threshold){
1050
        vard= check_input_motion(s, mb_x, mb_y, 1);
1051

    
1052
        if((vard+128)>>8 < c->avctx->me_threshold){
1053
            int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
1054
            int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
1055
            pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
1056
            c->mc_mb_var_sum_temp += (vard+128)>>8;
1057
            c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1058
            return;
1059
        }
1060
        if((vard+128)>>8 < c->avctx->mb_threshold)
1061
            mb_type= s->mb_type[mb_x + mb_y*s->mb_stride];
1062
    }
1063

    
1064
    switch(s->me_method) {
1065
    case ME_ZERO:
1066
    default:
1067
        no_motion_search(s, &mx, &my);
1068
        mx-= mb_x*16;
1069
        my-= mb_y*16;
1070
        dmin = 0;
1071
        break;
1072
    case ME_X1:
1073
    case ME_EPZS:
1074
       {
1075
            const int mot_stride = s->b8_stride;
1076
            const int mot_xy = s->block_index[0];
1077

    
1078
            P_LEFT[0]       = s->current_picture.motion_val[0][mot_xy - 1][0];
1079
            P_LEFT[1]       = s->current_picture.motion_val[0][mot_xy - 1][1];
1080

    
1081
            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
1082

    
1083
            if(!s->first_slice_line) {
1084
                P_TOP[0]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][0];
1085
                P_TOP[1]      = s->current_picture.motion_val[0][mot_xy - mot_stride    ][1];
1086
                P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
1087
                P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
1088
                if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1089
                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1090
                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1091

    
1092
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1093
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1094

    
1095
                if(s->out_format == FMT_H263){
1096
                    c->pred_x = P_MEDIAN[0];
1097
                    c->pred_y = P_MEDIAN[1];
1098
                }else { /* mpeg1 at least */
1099
                    c->pred_x= P_LEFT[0];
1100
                    c->pred_y= P_LEFT[1];
1101
                }
1102
            }else{
1103
                c->pred_x= P_LEFT[0];
1104
                c->pred_y= P_LEFT[1];
1105
            }
1106

    
1107
        }
1108
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
1109

    
1110
        break;
1111
    }
1112

    
1113
    /* At this point (mx,my) are full-pell and the relative displacement */
1114
    ppix = c->ref[0][0] + (my * s->linesize) + mx;
1115

    
1116
    vard = s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16);
1117

    
1118
    pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
1119
//    pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1120
    c->mc_mb_var_sum_temp += (vard+128)>>8;
1121

    
1122
#if 0
1123
    printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
1124
           varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
1125
#endif
1126
    if(mb_type){
1127
        int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
1128
        int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
1129
        c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1130

    
1131
        if(mb_type == CANDIDATE_MB_TYPE_INTER){
1132
            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1133
            set_p_mv_tables(s, mx, my, 1);
1134
        }else{
1135
            mx <<=shift;
1136
            my <<=shift;
1137
        }
1138
        if(mb_type == CANDIDATE_MB_TYPE_INTER4V){
1139
            h263_mv4_search(s, mx, my, shift);
1140

    
1141
            set_p_mv_tables(s, mx, my, 0);
1142
        }
1143
        if(mb_type == CANDIDATE_MB_TYPE_INTER_I){
1144
            interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 1);
1145
        }
1146
    }else if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1147
        int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
1148
        int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
1149
        c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1150

    
1151
        if (vard*2 + 200*256 > varc)
1152
            mb_type|= CANDIDATE_MB_TYPE_INTRA;
1153
        if (varc*2 + 200*256 > vard || s->qscale > 24){
1154
//        if (varc*2 + 200*256 + 50*(s->lambda2>>FF_LAMBDA_SHIFT) > vard){
1155
            mb_type|= CANDIDATE_MB_TYPE_INTER;
1156
            c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1157
            if(s->flags&CODEC_FLAG_MV0)
1158
                if(mx || my)
1159
                    mb_type |= CANDIDATE_MB_TYPE_SKIPPED; //FIXME check difference
1160
        }else{
1161
            mx <<=shift;
1162
            my <<=shift;
1163
        }
1164
        if((s->flags&CODEC_FLAG_4MV)
1165
           && !c->skip && varc>50<<8 && vard>10<<8){
1166
            if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
1167
                mb_type|=CANDIDATE_MB_TYPE_INTER4V;
1168

    
1169
            set_p_mv_tables(s, mx, my, 0);
1170
        }else
1171
            set_p_mv_tables(s, mx, my, 1);
1172
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1173
           && !c->skip){ //FIXME varc/d checks
1174
            if(interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0) < INT_MAX)
1175
                mb_type |= CANDIDATE_MB_TYPE_INTER_I;
1176
        }
1177
    }else{
1178
        int intra_score, i;
1179
        mb_type= CANDIDATE_MB_TYPE_INTER;
1180

    
1181
        dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1182
        if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1183
            dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1184

    
1185
        if((s->flags&CODEC_FLAG_4MV)
1186
           && !c->skip && varc>50<<8 && vard>10<<8){
1187
            int dmin4= h263_mv4_search(s, mx, my, shift);
1188
            if(dmin4 < dmin){
1189
                mb_type= CANDIDATE_MB_TYPE_INTER4V;
1190
                dmin=dmin4;
1191
            }
1192
        }
1193
        if((s->flags&CODEC_FLAG_INTERLACED_ME)
1194
           && !c->skip){ //FIXME varc/d checks
1195
            int dmin_i= interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 0);
1196
            if(dmin_i < dmin){
1197
                mb_type = CANDIDATE_MB_TYPE_INTER_I;
1198
                dmin= dmin_i;
1199
            }
1200
        }
1201

    
1202
//        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1203
        set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V);
1204

    
1205
        /* get intra luma score */
1206
        if((c->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
1207
            intra_score= varc - 500;
1208
        }else{
1209
            int mean= (sum+128)>>8;
1210
            mean*= 0x01010101;
1211

    
1212
            for(i=0; i<16; i++){
1213
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 0]) = mean;
1214
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 4]) = mean;
1215
                *(uint32_t*)(&c->scratchpad[i*s->linesize+ 8]) = mean;
1216
                *(uint32_t*)(&c->scratchpad[i*s->linesize+12]) = mean;
1217
            }
1218

    
1219
            intra_score= s->dsp.mb_cmp[0](s, c->scratchpad, pix, s->linesize, 16);
1220
        }
1221
#if 0 //FIXME
1222
        /* get chroma score */
1223
        if(c->avctx->mb_cmp&FF_CMP_CHROMA){
1224
            for(i=1; i<3; i++){
1225
                uint8_t *dest_c;
1226
                int mean;
1227

1228
                if(s->out_format == FMT_H263){
1229
                    mean= (s->dc_val[i][mb_x + mb_y*s->b8_stride] + 4)>>3; //FIXME not exact but simple ;)
1230
                }else{
1231
                    mean= (s->last_dc[i] + 4)>>3;
1232
                }
1233
                dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
1234

1235
                mean*= 0x01010101;
1236
                for(i=0; i<8; i++){
1237
                    *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 0]) = mean;
1238
                    *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 4]) = mean;
1239
                }
1240

1241
                intra_score+= s->dsp.mb_cmp[1](s, c->scratchpad, dest_c, s->uvlinesize);
1242
            }
1243
        }
1244
#endif
1245
        intra_score += c->mb_penalty_factor*16;
1246

    
1247
        if(intra_score < dmin){
1248
            mb_type= CANDIDATE_MB_TYPE_INTRA;
1249
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup
1250
        }else
1251
            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0;
1252

    
1253
        {
1254
            int p_score= FFMIN(vard, varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*100);
1255
            int i_score= varc-500+(s->lambda2>>FF_LAMBDA_SHIFT)*20;
1256
            c->scene_change_score+= ff_sqrt(p_score) - ff_sqrt(i_score);
1257
        }
1258
    }
1259

    
1260
    s->mb_type[mb_y*s->mb_stride + mb_x]= mb_type;
1261
}
1262

    
1263
int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
1264
                                    int mb_x, int mb_y)
1265
{
1266
    MotionEstContext * const c= &s->me;
1267
    int mx, my, dmin;
1268
    int P[10][2];
1269
    const int shift= 1+s->quarter_sample;
1270
    const int xy= mb_x + mb_y*s->mb_stride;
1271
    init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
1272

    
1273
    assert(s->quarter_sample==0 || s->quarter_sample==1);
1274

    
1275
    c->pre_penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp);
1276
    c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1277

    
1278
    get_limits(s, 16*mb_x, 16*mb_y);
1279
    c->skip=0;
1280

    
1281
    P_LEFT[0]       = s->p_mv_table[xy + 1][0];
1282
    P_LEFT[1]       = s->p_mv_table[xy + 1][1];
1283

    
1284
    if(P_LEFT[0]       < (c->xmin<<shift)) P_LEFT[0]       = (c->xmin<<shift);
1285

    
1286
    /* special case for first line */
1287
    if (s->first_slice_line) {
1288
        c->pred_x= P_LEFT[0];
1289
        c->pred_y= P_LEFT[1];
1290
        P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
1291
        P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME
1292
    } else {
1293
        P_TOP[0]      = s->p_mv_table[xy + s->mb_stride    ][0];
1294
        P_TOP[1]      = s->p_mv_table[xy + s->mb_stride    ][1];
1295
        P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0];
1296
        P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1];
1297
        if(P_TOP[1]      < (c->ymin<<shift)) P_TOP[1]     = (c->ymin<<shift);
1298
        if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
1299
        if(P_TOPRIGHT[1] < (c->ymin<<shift)) P_TOPRIGHT[1]= (c->ymin<<shift);
1300

    
1301
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1302
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1303

    
1304
        c->pred_x = P_MEDIAN[0];
1305
        c->pred_y = P_MEDIAN[1];
1306
    }
1307

    
1308
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
1309

    
1310
    s->p_mv_table[xy][0] = mx<<shift;
1311
    s->p_mv_table[xy][1] = my<<shift;
1312

    
1313
    return dmin;
1314
}
1315

    
1316
static int ff_estimate_motion_b(MpegEncContext * s,
1317
                       int mb_x, int mb_y, int16_t (*mv_table)[2], int ref_index, int f_code)
1318
{
1319
    MotionEstContext * const c= &s->me;
1320
    int mx, my, dmin;
1321
    int P[10][2];
1322
    const int shift= 1+s->quarter_sample;
1323
    const int mot_stride = s->mb_stride;
1324
    const int mot_xy = mb_y*mot_stride + mb_x;
1325
    uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_MV;
1326
    int mv_scale;
1327

    
1328
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1329
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1330
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1331
    c->current_mv_penalty= mv_penalty;
1332

    
1333
    get_limits(s, 16*mb_x, 16*mb_y);
1334

    
1335
    switch(s->me_method) {
1336
    case ME_ZERO:
1337
    default:
1338
        no_motion_search(s, &mx, &my);
1339
        dmin = 0;
1340
        mx-= mb_x*16;
1341
        my-= mb_y*16;
1342
        break;
1343
    case ME_X1:
1344
    case ME_EPZS:
1345
       {
1346
            P_LEFT[0]        = mv_table[mot_xy - 1][0];
1347
            P_LEFT[1]        = mv_table[mot_xy - 1][1];
1348

    
1349
            if(P_LEFT[0]       > (c->xmax<<shift)) P_LEFT[0]       = (c->xmax<<shift);
1350

    
1351
            /* special case for first line */
1352
            if (!s->first_slice_line) {
1353
                P_TOP[0] = mv_table[mot_xy - mot_stride             ][0];
1354
                P_TOP[1] = mv_table[mot_xy - mot_stride             ][1];
1355
                P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1         ][0];
1356
                P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1         ][1];
1357
                if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1]= (c->ymax<<shift);
1358
                if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1359
                if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1360

    
1361
                P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1362
                P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1363
            }
1364
            c->pred_x= P_LEFT[0];
1365
            c->pred_y= P_LEFT[1];
1366
        }
1367

    
1368
        if(mv_table == s->b_forw_mv_table){
1369
            mv_scale= (s->pb_time<<16) / (s->pp_time<<shift);
1370
        }else{
1371
            mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
1372
        }
1373

    
1374
        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale, 0, 16);
1375

    
1376
        break;
1377
    }
1378

    
1379
    dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, ref_index, 0, 16);
1380

    
1381
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1382
        dmin= ff_get_mb_score(s, mx, my, 0, ref_index, 0, 16, 1);
1383

    
1384
//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1385
//    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
1386
    mv_table[mot_xy][0]= mx;
1387
    mv_table[mot_xy][1]= my;
1388

    
1389
    return dmin;
1390
}
1391

    
1392
static inline int check_bidir_mv(MpegEncContext * s,
1393
                   int motion_fx, int motion_fy,
1394
                   int motion_bx, int motion_by,
1395
                   int pred_fx, int pred_fy,
1396
                   int pred_bx, int pred_by,
1397
                   int size, int h)
1398
{
1399
    //FIXME optimize?
1400
    //FIXME better f_code prediction (max mv & distance)
1401
    //FIXME pointers
1402
    MotionEstContext * const c= &s->me;
1403
    uint8_t * const mv_penalty_f= c->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
1404
    uint8_t * const mv_penalty_b= c->mv_penalty[s->b_code] + MAX_MV; // f_code of the prev frame
1405
    int stride= c->stride;
1406
    uint8_t *dest_y = c->scratchpad;
1407
    uint8_t *ptr;
1408
    int dxy;
1409
    int src_x, src_y;
1410
    int fbmin;
1411
    uint8_t **src_data= c->src[0];
1412
    uint8_t **ref_data= c->ref[0];
1413
    uint8_t **ref2_data= c->ref[2];
1414

    
1415
    if(s->quarter_sample){
1416
        dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
1417
        src_x = motion_fx >> 2;
1418
        src_y = motion_fy >> 2;
1419

    
1420
        ptr = ref_data[0] + (src_y * stride) + src_x;
1421
        s->dsp.put_qpel_pixels_tab[0][dxy](dest_y    , ptr    , stride);
1422

    
1423
        dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1424
        src_x = motion_bx >> 2;
1425
        src_y = motion_by >> 2;
1426

    
1427
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1428
        s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y    , ptr    , stride);
1429
    }else{
1430
        dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1431
        src_x = motion_fx >> 1;
1432
        src_y = motion_fy >> 1;
1433

    
1434
        ptr = ref_data[0] + (src_y * stride) + src_x;
1435
        s->dsp.put_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
1436

    
1437
        dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1438
        src_x = motion_bx >> 1;
1439
        src_y = motion_by >> 1;
1440

    
1441
        ptr = ref2_data[0] + (src_y * stride) + src_x;
1442
        s->dsp.avg_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
1443
    }
1444

    
1445
    fbmin = (mv_penalty_f[motion_fx-pred_fx] + mv_penalty_f[motion_fy-pred_fy])*c->mb_penalty_factor
1446
           +(mv_penalty_b[motion_bx-pred_bx] + mv_penalty_b[motion_by-pred_by])*c->mb_penalty_factor
1447
           + s->dsp.mb_cmp[size](s, src_data[0], dest_y, stride, h); //FIXME new_pic
1448

    
1449
    if(c->avctx->mb_cmp&FF_CMP_CHROMA){
1450
    }
1451
    //FIXME CHROMA !!!
1452

    
1453
    return fbmin;
1454
}
1455

    
1456
/* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
1457
static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
1458
{
1459
    MotionEstContext * const c= &s->me;
1460
    const int mot_stride = s->mb_stride;
1461
    const int xy = mb_y *mot_stride + mb_x;
1462
    int fbmin;
1463
    int pred_fx= s->b_bidir_forw_mv_table[xy-1][0];
1464
    int pred_fy= s->b_bidir_forw_mv_table[xy-1][1];
1465
    int pred_bx= s->b_bidir_back_mv_table[xy-1][0];
1466
    int pred_by= s->b_bidir_back_mv_table[xy-1][1];
1467
    int motion_fx= s->b_bidir_forw_mv_table[xy][0]= s->b_forw_mv_table[xy][0];
1468
    int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
1469
    int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
1470
    int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
1471
    const int flags= c->sub_flags;
1472
    const int qpel= flags&FLAG_QPEL;
1473
    const int shift= 1+qpel;
1474
    const int xmin= c->xmin<<shift;
1475
    const int ymin= c->ymin<<shift;
1476
    const int xmax= c->xmax<<shift;
1477
    const int ymax= c->ymax<<shift;
1478
#define HASH(fx,fy,bx,by) ((fx)+17*(fy)+63*(bx)+117*(by))
1479
#define HASH8(fx,fy,bx,by) ((uint8_t)HASH(fx,fy,bx,by))
1480
    int hashidx= HASH(motion_fx,motion_fy, motion_bx, motion_by);
1481
    uint8_t map[256];
1482

    
1483
    memset(map,0,sizeof(map));
1484
    map[hashidx&255] = 1;
1485

    
1486
    fbmin= check_bidir_mv(s, motion_fx, motion_fy,
1487
                          motion_bx, motion_by,
1488
                          pred_fx, pred_fy,
1489
                          pred_bx, pred_by,
1490
                          0, 16);
1491

    
1492
    if(s->avctx->bidir_refine){
1493
        int end;
1494
        static const uint8_t limittab[5]={0,8,32,64,80};
1495
        const int limit= limittab[s->avctx->bidir_refine];
1496
        static const int8_t vect[][4]={
1497
{ 0, 0, 0, 1}, { 0, 0, 0,-1}, { 0, 0, 1, 0}, { 0, 0,-1, 0}, { 0, 1, 0, 0}, { 0,-1, 0, 0}, { 1, 0, 0, 0}, {-1, 0, 0, 0},
1498

    
1499
{ 0, 0, 1, 1}, { 0, 0,-1,-1}, { 0, 1, 1, 0}, { 0,-1,-1, 0}, { 1, 1, 0, 0}, {-1,-1, 0, 0}, { 1, 0, 0, 1}, {-1, 0, 0,-1},
1500
{ 0, 1, 0, 1}, { 0,-1, 0,-1}, { 1, 0, 1, 0}, {-1, 0,-1, 0},
1501
{ 0, 0,-1, 1}, { 0, 0, 1,-1}, { 0,-1, 1, 0}, { 0, 1,-1, 0}, {-1, 1, 0, 0}, { 1,-1, 0, 0}, { 1, 0, 0,-1}, {-1, 0, 0, 1},
1502
{ 0,-1, 0, 1}, { 0, 1, 0,-1}, {-1, 0, 1, 0}, { 1, 0,-1, 0},
1503

    
1504
{ 0, 1, 1, 1}, { 0,-1,-1,-1}, { 1, 1, 1, 0}, {-1,-1,-1, 0}, { 1, 1, 0, 1}, {-1,-1, 0,-1}, { 1, 0, 1, 1}, {-1, 0,-1,-1},
1505
{ 0,-1, 1, 1}, { 0, 1,-1,-1}, {-1, 1, 1, 0}, { 1,-1,-1, 0}, { 1, 1, 0,-1}, {-1,-1, 0, 1}, { 1, 0,-1, 1}, {-1, 0, 1,-1},
1506
{ 0, 1,-1, 1}, { 0,-1, 1,-1}, { 1,-1, 1, 0}, {-1, 1,-1, 0}, {-1, 1, 0, 1}, { 1,-1, 0,-1}, { 1, 0, 1,-1}, {-1, 0,-1, 1},
1507
{ 0, 1, 1,-1}, { 0,-1,-1, 1}, { 1, 1,-1, 0}, {-1,-1, 1, 0}, { 1,-1, 0, 1}, {-1, 1, 0,-1}, {-1, 0, 1, 1}, { 1, 0,-1,-1},
1508

    
1509
{ 1, 1, 1, 1}, {-1,-1,-1,-1},
1510
{ 1, 1, 1,-1}, {-1,-1,-1, 1}, { 1, 1,-1, 1}, {-1,-1, 1,-1}, { 1,-1, 1, 1}, {-1, 1,-1,-1}, {-1, 1, 1, 1}, { 1,-1,-1,-1},
1511
{ 1, 1,-1,-1}, {-1,-1, 1, 1}, { 1,-1,-1, 1}, {-1, 1, 1,-1}, { 1,-1, 1,-1}, {-1, 1,-1, 1},
1512
        };
1513
        static const uint8_t hash[]={
1514
HASH8( 0, 0, 0, 1), HASH8( 0, 0, 0,-1), HASH8( 0, 0, 1, 0), HASH8( 0, 0,-1, 0), HASH8( 0, 1, 0, 0), HASH8( 0,-1, 0, 0), HASH8( 1, 0, 0, 0), HASH8(-1, 0, 0, 0),
1515

    
1516
HASH8( 0, 0, 1, 1), HASH8( 0, 0,-1,-1), HASH8( 0, 1, 1, 0), HASH8( 0,-1,-1, 0), HASH8( 1, 1, 0, 0), HASH8(-1,-1, 0, 0), HASH8( 1, 0, 0, 1), HASH8(-1, 0, 0,-1),
1517
HASH8( 0, 1, 0, 1), HASH8( 0,-1, 0,-1), HASH8( 1, 0, 1, 0), HASH8(-1, 0,-1, 0),
1518
HASH8( 0, 0,-1, 1), HASH8( 0, 0, 1,-1), HASH8( 0,-1, 1, 0), HASH8( 0, 1,-1, 0), HASH8(-1, 1, 0, 0), HASH8( 1,-1, 0, 0), HASH8( 1, 0, 0,-1), HASH8(-1, 0, 0, 1),
1519
HASH8( 0,-1, 0, 1), HASH8( 0, 1, 0,-1), HASH8(-1, 0, 1, 0), HASH8( 1, 0,-1, 0),
1520

    
1521
HASH8( 0, 1, 1, 1), HASH8( 0,-1,-1,-1), HASH8( 1, 1, 1, 0), HASH8(-1,-1,-1, 0), HASH8( 1, 1, 0, 1), HASH8(-1,-1, 0,-1), HASH8( 1, 0, 1, 1), HASH8(-1, 0,-1,-1),
1522
HASH8( 0,-1, 1, 1), HASH8( 0, 1,-1,-1), HASH8(-1, 1, 1, 0), HASH8( 1,-1,-1, 0), HASH8( 1, 1, 0,-1), HASH8(-1,-1, 0, 1), HASH8( 1, 0,-1, 1), HASH8(-1, 0, 1,-1),
1523
HASH8( 0, 1,-1, 1), HASH8( 0,-1, 1,-1), HASH8( 1,-1, 1, 0), HASH8(-1, 1,-1, 0), HASH8(-1, 1, 0, 1), HASH8( 1,-1, 0,-1), HASH8( 1, 0, 1,-1), HASH8(-1, 0,-1, 1),
1524
HASH8( 0, 1, 1,-1), HASH8( 0,-1,-1, 1), HASH8( 1, 1,-1, 0), HASH8(-1,-1, 1, 0), HASH8( 1,-1, 0, 1), HASH8(-1, 1, 0,-1), HASH8(-1, 0, 1, 1), HASH8( 1, 0,-1,-1),
1525

    
1526
HASH8( 1, 1, 1, 1), HASH8(-1,-1,-1,-1),
1527
HASH8( 1, 1, 1,-1), HASH8(-1,-1,-1, 1), HASH8( 1, 1,-1, 1), HASH8(-1,-1, 1,-1), HASH8( 1,-1, 1, 1), HASH8(-1, 1,-1,-1), HASH8(-1, 1, 1, 1), HASH8( 1,-1,-1,-1),
1528
HASH8( 1, 1,-1,-1), HASH8(-1,-1, 1, 1), HASH8( 1,-1,-1, 1), HASH8(-1, 1, 1,-1), HASH8( 1,-1, 1,-1), HASH8(-1, 1,-1, 1),
1529
};
1530

    
1531
#define CHECK_BIDIR(fx,fy,bx,by)\
1532
    if( !map[(hashidx+HASH(fx,fy,bx,by))&255]\
1533
       &&(fx<=0 || motion_fx+fx<=xmax) && (fy<=0 || motion_fy+fy<=ymax) && (bx<=0 || motion_bx+bx<=xmax) && (by<=0 || motion_by+by<=ymax)\
1534
       &&(fx>=0 || motion_fx+fx>=xmin) && (fy>=0 || motion_fy+fy>=ymin) && (bx>=0 || motion_bx+bx>=xmin) && (by>=0 || motion_by+by>=ymin)){\
1535
        int score;\
1536
        map[(hashidx+HASH(fx,fy,bx,by))&255] = 1;\
1537
        score= check_bidir_mv(s, motion_fx+fx, motion_fy+fy, motion_bx+bx, motion_by+by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);\
1538
        if(score < fbmin){\
1539
            hashidx += HASH(fx,fy,bx,by);\
1540
            fbmin= score;\
1541
            motion_fx+=fx;\
1542
            motion_fy+=fy;\
1543
            motion_bx+=bx;\
1544
            motion_by+=by;\
1545
            end=0;\
1546
        }\
1547
    }
1548
#define CHECK_BIDIR2(a,b,c,d)\
1549
CHECK_BIDIR(a,b,c,d)\
1550
CHECK_BIDIR(-(a),-(b),-(c),-(d))
1551

    
1552
        do{
1553
            int i;
1554
            int borderdist=0;
1555
            end=1;
1556

    
1557
            CHECK_BIDIR2(0,0,0,1)
1558
            CHECK_BIDIR2(0,0,1,0)
1559
            CHECK_BIDIR2(0,1,0,0)
1560
            CHECK_BIDIR2(1,0,0,0)
1561

    
1562
            for(i=8; i<limit; i++){
1563
                int fx= motion_fx+vect[i][0];
1564
                int fy= motion_fy+vect[i][1];
1565
                int bx= motion_bx+vect[i][2];
1566
                int by= motion_by+vect[i][3];
1567
                if(borderdist<=0){
1568
                    int a= (xmax - FFMAX(fx,bx))|(FFMIN(fx,bx) - xmin);
1569
                    int b= (ymax - FFMAX(fy,by))|(FFMIN(fy,by) - ymin);
1570
                    if((a|b) < 0)
1571
                        map[(hashidx+hash[i])&255] = 1;
1572
                }
1573
                if(!map[(hashidx+hash[i])&255]){
1574
                    int score;
1575
                    map[(hashidx+hash[i])&255] = 1;
1576
                    score= check_bidir_mv(s, fx, fy, bx, by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);
1577
                    if(score < fbmin){
1578
                        hashidx += hash[i];
1579
                        fbmin= score;
1580
                        motion_fx=fx;
1581
                        motion_fy=fy;
1582
                        motion_bx=bx;
1583
                        motion_by=by;
1584
                        end=0;
1585
                        borderdist--;
1586
                        if(borderdist<=0){
1587
                            int a= FFMIN(xmax - FFMAX(fx,bx), FFMIN(fx,bx) - xmin);
1588
                            int b= FFMIN(ymax - FFMAX(fy,by), FFMIN(fy,by) - ymin);
1589
                            borderdist= FFMIN(a,b);
1590
                        }
1591
                    }
1592
                }
1593
            }
1594
        }while(!end);
1595
    }
1596

    
1597
    s->b_bidir_forw_mv_table[xy][0]= motion_fx;
1598
    s->b_bidir_forw_mv_table[xy][1]= motion_fy;
1599
    s->b_bidir_back_mv_table[xy][0]= motion_bx;
1600
    s->b_bidir_back_mv_table[xy][1]= motion_by;
1601

    
1602
    return fbmin;
1603
}
1604

    
1605
static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
1606
{
1607
    MotionEstContext * const c= &s->me;
1608
    int P[10][2];
1609
    const int mot_stride = s->mb_stride;
1610
    const int mot_xy = mb_y*mot_stride + mb_x;
1611
    const int shift= 1+s->quarter_sample;
1612
    int dmin, i;
1613
    const int time_pp= s->pp_time;
1614
    const int time_pb= s->pb_time;
1615
    int mx, my, xmin, xmax, ymin, ymax;
1616
    int16_t (*mv_table)[2]= s->b_direct_mv_table;
1617

    
1618
    c->current_mv_penalty= c->mv_penalty[1] + MAX_MV;
1619
    ymin= xmin=(-32)>>shift;
1620
    ymax= xmax=   31>>shift;
1621

    
1622
    if(IS_8X8(s->next_picture.mb_type[mot_xy])){
1623
        s->mv_type= MV_TYPE_8X8;
1624
    }else{
1625
        s->mv_type= MV_TYPE_16X16;
1626
    }
1627

    
1628
    for(i=0; i<4; i++){
1629
        int index= s->block_index[i];
1630
        int min, max;
1631

    
1632
        c->co_located_mv[i][0]= s->next_picture.motion_val[0][index][0];
1633
        c->co_located_mv[i][1]= s->next_picture.motion_val[0][index][1];
1634
        c->direct_basis_mv[i][0]= c->co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
1635
        c->direct_basis_mv[i][1]= c->co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
1636
//        c->direct_basis_mv[1][i][0]= c->co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
1637
//        c->direct_basis_mv[1][i][1]= c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);
1638

    
1639
        max= FFMAX(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
1640
        min= FFMIN(c->direct_basis_mv[i][0], c->direct_basis_mv[i][0] - c->co_located_mv[i][0])>>shift;
1641
        max+= 16*mb_x + 1; // +-1 is for the simpler rounding
1642
        min+= 16*mb_x - 1;
1643
        xmax= FFMIN(xmax, s->width - max);
1644
        xmin= FFMAX(xmin, - 16     - min);
1645

    
1646
        max= FFMAX(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
1647
        min= FFMIN(c->direct_basis_mv[i][1], c->direct_basis_mv[i][1] - c->co_located_mv[i][1])>>shift;
1648
        max+= 16*mb_y + 1; // +-1 is for the simpler rounding
1649
        min+= 16*mb_y - 1;
1650
        ymax= FFMIN(ymax, s->height - max);
1651
        ymin= FFMAX(ymin, - 16      - min);
1652

    
1653
        if(s->mv_type == MV_TYPE_16X16) break;
1654
    }
1655

    
1656
    assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
1657

    
1658
    if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
1659
        s->b_direct_mv_table[mot_xy][0]= 0;
1660
        s->b_direct_mv_table[mot_xy][1]= 0;
1661

    
1662
        return 256*256*256*64;
1663
    }
1664

    
1665
    c->xmin= xmin;
1666
    c->ymin= ymin;
1667
    c->xmax= xmax;
1668
    c->ymax= ymax;
1669
    c->flags     |= FLAG_DIRECT;
1670
    c->sub_flags |= FLAG_DIRECT;
1671
    c->pred_x=0;
1672
    c->pred_y=0;
1673

    
1674
    P_LEFT[0]        = av_clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
1675
    P_LEFT[1]        = av_clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);
1676

    
1677
    /* special case for first line */
1678
    if (!s->first_slice_line) { //FIXME maybe allow this over thread boundary as it is clipped
1679
        P_TOP[0]      = av_clip(mv_table[mot_xy - mot_stride             ][0], xmin<<shift, xmax<<shift);
1680
        P_TOP[1]      = av_clip(mv_table[mot_xy - mot_stride             ][1], ymin<<shift, ymax<<shift);
1681
        P_TOPRIGHT[0] = av_clip(mv_table[mot_xy - mot_stride + 1         ][0], xmin<<shift, xmax<<shift);
1682
        P_TOPRIGHT[1] = av_clip(mv_table[mot_xy - mot_stride + 1         ][1], ymin<<shift, ymax<<shift);
1683

    
1684
        P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1685
        P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1686
    }
1687

    
1688
    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift), 0, 16);
1689
    if(c->sub_flags&FLAG_QPEL)
1690
        dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1691
    else
1692
        dmin = hpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
1693

    
1694
    if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
1695
        dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
1696

    
1697
    get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed
1698

    
1699
    mv_table[mot_xy][0]= mx;
1700
    mv_table[mot_xy][1]= my;
1701
    c->flags     &= ~FLAG_DIRECT;
1702
    c->sub_flags &= ~FLAG_DIRECT;
1703

    
1704
    return dmin;
1705
}
1706

    
1707
void ff_estimate_b_frame_motion(MpegEncContext * s,
1708
                             int mb_x, int mb_y)
1709
{
1710
    MotionEstContext * const c= &s->me;
1711
    const int penalty_factor= c->mb_penalty_factor;
1712
    int fmin, bmin, dmin, fbmin, bimin, fimin;
1713
    int type=0;
1714
    const int xy = mb_y*s->mb_stride + mb_x;
1715
    init_ref(c, s->new_picture.data, s->last_picture.data, s->next_picture.data, 16*mb_x, 16*mb_y, 2);
1716

    
1717
    get_limits(s, 16*mb_x, 16*mb_y);
1718

    
1719
    c->skip=0;
1720

    
1721
    if(s->codec_id == CODEC_ID_MPEG4 && s->next_picture.mbskip_table[xy]){
1722
        int score= direct_search(s, mb_x, mb_y); //FIXME just check 0,0
1723

    
1724
        score= ((unsigned)(score*score + 128*256))>>16;
1725
        c->mc_mb_var_sum_temp += score;
1726
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
1727
        s->mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_DIRECT0;
1728

    
1729
        return;
1730
    }
1731

    
1732
    if(c->avctx->me_threshold){
1733
        int vard= check_input_motion(s, mb_x, mb_y, 0);
1734

    
1735
        if((vard+128)>>8 < c->avctx->me_threshold){
1736
//            pix = c->src[0][0];
1737
//            sum = s->dsp.pix_sum(pix, s->linesize);
1738
//            varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
1739

    
1740
//            pic->mb_var   [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
1741
             s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
1742
/*            pic->mb_mean  [s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1743
            c->mb_var_sum_temp    += (varc+128)>>8;*/
1744
            c->mc_mb_var_sum_temp += (vard+128)>>8;
1745
/*            if (vard <= 64<<8 || vard < varc) {
1746
                c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1747
            }else{
1748
                c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
1749
            }*/
1750
            return;
1751
        }
1752
        if((vard+128)>>8 < c->avctx->mb_threshold){
1753
            type= s->mb_type[mb_y*s->mb_stride + mb_x];
1754
            if(type == CANDIDATE_MB_TYPE_DIRECT){
1755
                direct_search(s, mb_x, mb_y);
1756
            }
1757
            if(type == CANDIDATE_MB_TYPE_FORWARD || type == CANDIDATE_MB_TYPE_BIDIR){
1758
                c->skip=0;
1759
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code);
1760
            }
1761
            if(type == CANDIDATE_MB_TYPE_BACKWARD || type == CANDIDATE_MB_TYPE_BIDIR){
1762
                c->skip=0;
1763
                ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code);
1764
            }
1765
            if(type == CANDIDATE_MB_TYPE_FORWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
1766
                c->skip=0;
1767
                c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1768
                interlaced_search(s, 0,
1769
                                        s->b_field_mv_table[0], s->b_field_select_table[0],
1770
                                        s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 1);
1771
            }
1772
            if(type == CANDIDATE_MB_TYPE_BACKWARD_I || type == CANDIDATE_MB_TYPE_BIDIR_I){
1773
                c->skip=0;
1774
                c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
1775
                interlaced_search(s, 2,
1776
                                        s->b_field_mv_table[1], s->b_field_select_table[1],
1777
                                        s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 1);
1778
            }
1779
            return;
1780
        }
1781
    }
1782

    
1783
    if (s->codec_id == CODEC_ID_MPEG4)
1784
        dmin= direct_search(s, mb_x, mb_y);
1785
    else
1786
        dmin= INT_MAX;
1787
//FIXME penalty stuff for non mpeg4
1788
    c->skip=0;
1789
    fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code) + 3*penalty_factor;
1790

    
1791
    c->skip=0;
1792
    bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code) + 2*penalty_factor;
1793
//printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1794

    
1795
    c->skip=0;
1796
    fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
1797
//printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin);
1798

    
1799
    if(s->flags & CODEC_FLAG_INTERLACED_ME){
1800
//FIXME mb type penalty
1801
        c->skip=0;
1802
        c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV;
1803
        fimin= interlaced_search(s, 0,
1804
                                 s->b_field_mv_table[0], s->b_field_select_table[0],
1805
                                 s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1], 0);
1806
        c->current_mv_penalty= c->mv_penalty[s->b_code] + MAX_MV;
1807
        bimin= interlaced_search(s, 2,
1808
                                 s->b_field_mv_table[1], s->b_field_select_table[1],
1809
                                 s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1], 0);
1810
    }else
1811
        fimin= bimin= INT_MAX;
1812

    
1813
    {
1814
        int score= fmin;
1815
        type = CANDIDATE_MB_TYPE_FORWARD;
1816

    
1817
        if (dmin <= score){
1818
            score = dmin;
1819
            type = CANDIDATE_MB_TYPE_DIRECT;
1820
        }
1821
        if(bmin<score){
1822
            score=bmin;
1823
            type= CANDIDATE_MB_TYPE_BACKWARD;
1824
        }
1825
        if(fbmin<score){
1826
            score=fbmin;
1827
            type= CANDIDATE_MB_TYPE_BIDIR;
1828
        }
1829
        if(fimin<score){
1830
            score=fimin;
1831
            type= CANDIDATE_MB_TYPE_FORWARD_I;
1832
        }
1833
        if(bimin<score){
1834
            score=bimin;
1835
            type= CANDIDATE_MB_TYPE_BACKWARD_I;
1836
        }
1837

    
1838
        score= ((unsigned)(score*score + 128*256))>>16;
1839
        c->mc_mb_var_sum_temp += score;
1840
        s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
1841
    }
1842

    
1843
    if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1844
        type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT;
1845
        if(fimin < INT_MAX)
1846
            type |= CANDIDATE_MB_TYPE_FORWARD_I;
1847
        if(bimin < INT_MAX)
1848
            type |= CANDIDATE_MB_TYPE_BACKWARD_I;
1849
        if(fimin < INT_MAX && bimin < INT_MAX){
1850
            type |= CANDIDATE_MB_TYPE_BIDIR_I;
1851
        }
1852
         //FIXME something smarter
1853
        if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //do not try direct mode if it is invalid for this MB
1854
        if(s->codec_id == CODEC_ID_MPEG4 && type&CANDIDATE_MB_TYPE_DIRECT && s->flags&CODEC_FLAG_MV0 && *(uint32_t*)s->b_direct_mv_table[xy])
1855
            type |= CANDIDATE_MB_TYPE_DIRECT0;
1856
#if 0
1857
        if(s->out_format == FMT_MPEG1)
1858
            type |= CANDIDATE_MB_TYPE_INTRA;
1859
#endif
1860
    }
1861

    
1862
    s->mb_type[mb_y*s->mb_stride + mb_x]= type;
1863
}
1864

    
1865
/* find best f_code for ME which do unlimited searches */
1866
int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
1867
{
1868
    if(s->me_method>=ME_EPZS){
1869
        int score[8];
1870
        int i, y, range= s->avctx->me_range ? s->avctx->me_range : (INT_MAX/2);
1871
        uint8_t * fcode_tab= s->fcode_tab;
1872
        int best_fcode=-1;
1873
        int best_score=-10000000;
1874

    
1875
        if(s->msmpeg4_version)
1876
            range= FFMIN(range, 16);
1877
        else if(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL)
1878
            range= FFMIN(range, 256);
1879

    
1880
        for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
1881

    
1882
        for(y=0; y<s->mb_height; y++){
1883
            int x;
1884
            int xy= y*s->mb_stride;
1885
            for(x=0; x<s->mb_width; x++){
1886
                if(s->mb_type[xy] & type){
1887
                    int mx= mv_table[xy][0];
1888
                    int my= mv_table[xy][1];
1889
                    int fcode= FFMAX(fcode_tab[mx + MAX_MV],
1890
                                     fcode_tab[my + MAX_MV]);
1891
                    int j;
1892

    
1893
                        if(mx >= range || mx < -range ||
1894
                           my >= range || my < -range)
1895
                            continue;
1896

    
1897
                    for(j=0; j<fcode && j<8; j++){
1898
                        if(s->pict_type==FF_B_TYPE || s->current_picture.mc_mb_var[xy] < s->current_picture.mb_var[xy])
1899
                            score[j]-= 170;
1900
                    }
1901
                }
1902
                xy++;
1903
            }
1904
        }
1905

    
1906
        for(i=1; i<8; i++){
1907
            if(score[i] > best_score){
1908
                best_score= score[i];
1909
                best_fcode= i;
1910
            }
1911
//            printf("%d %d\n", i, score[i]);
1912
        }
1913

    
1914
//    printf("fcode: %d type: %d\n", i, s->pict_type);
1915
        return best_fcode;
1916
/*        for(i=0; i<=MAX_FCODE; i++){
1917
            printf("%d ", mv_num[i]);
1918
        }
1919
        printf("\n");*/
1920
    }else{
1921
        return 1;
1922
    }
1923
}
1924

    
1925
void ff_fix_long_p_mvs(MpegEncContext * s)
1926
{
1927
    MotionEstContext * const c= &s->me;
1928
    const int f_code= s->f_code;
1929
    int y, range;
1930
    assert(s->pict_type==FF_P_TYPE);
1931

    
1932
    range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);
1933

    
1934
    assert(range <= 16 || !s->msmpeg4_version);
1935
    assert(range <=256 || !(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL));
1936

    
1937
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
1938

    
1939
//printf("%d no:%d %d//\n", clip, noclip, f_code);
1940
    if(s->flags&CODEC_FLAG_4MV){
1941
        const int wrap= s->b8_stride;
1942

    
1943
        /* clip / convert to intra 8x8 type MVs */
1944
        for(y=0; y<s->mb_height; y++){
1945
            int xy= y*2*wrap;
1946
            int i= y*s->mb_stride;
1947
            int x;
1948

    
1949
            for(x=0; x<s->mb_width; x++){
1950
                if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){
1951
                    int block;
1952
                    for(block=0; block<4; block++){
1953
                        int off= (block& 1) + (block>>1)*wrap;
1954
                        int mx= s->current_picture.motion_val[0][ xy + off ][0];
1955
                        int my= s->current_picture.motion_val[0][ xy + off ][1];
1956

    
1957
                        if(   mx >=range || mx <-range
1958
                           || my >=range || my <-range){
1959
                            s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V;
1960
                            s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA;
1961
                            s->current_picture.mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
1962
                        }
1963
                    }
1964
                }
1965
                xy+=2;
1966
                i++;
1967
            }
1968
        }
1969
    }
1970
}
1971

    
1972
/**
1973
 *
1974
 * @param truncate 1 for truncation, 0 for using intra
1975
 */
1976
void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
1977
                     int16_t (*mv_table)[2], int f_code, int type, int truncate)
1978
{
1979
    MotionEstContext * const c= &s->me;
1980
    int y, h_range, v_range;
1981

    
1982
    // RAL: 8 in MPEG-1, 16 in MPEG-4
1983
    int range = (((s->out_format == FMT_MPEG1 || s->msmpeg4_version) ? 8 : 16) << f_code);
1984

    
1985
    if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
1986

    
1987
    h_range= range;
1988
    v_range= field_select_table ? range>>1 : range;
1989

    
1990
    /* clip / convert to intra 16x16 type MVs */
1991
    for(y=0; y<s->mb_height; y++){
1992
        int x;
1993
        int xy= y*s->mb_stride;
1994
        for(x=0; x<s->mb_width; x++){
1995
            if (s->mb_type[xy] & type){    // RAL: "type" test added...
1996
                if(field_select_table==NULL || field_select_table[xy] == field_select){
1997
                    if(   mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range
1998
                       || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){
1999

    
2000
                        if(truncate){
2001
                            if     (mv_table[xy][0] > h_range-1) mv_table[xy][0]=  h_range-1;
2002
                            else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range;
2003
                            if     (mv_table[xy][1] > v_range-1) mv_table[xy][1]=  v_range-1;
2004
                            else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range;
2005
                        }else{
2006
                            s->mb_type[xy] &= ~type;
2007
                            s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA;
2008
                            mv_table[xy][0]=
2009
                            mv_table[xy][1]= 0;
2010
                        }
2011
                    }
2012
                }
2013
            }
2014
            xy++;
2015
        }
2016
    }
2017
}