Statistics
| Branch: | Revision:

ffmpeg / libavcodec / motion_est_template.c @ 5509bffa

History | View | Annotate | Download (37.8 KB)

1
/*
2
 * Motion estimation
3
 * Copyright (c) 2002-2004 Michael Niedermayer
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
 *
19
 */
20

    
21
/**
22
 * @file motion_est_template.c
23
 * Motion estimation template.
24
 */
25

    
26
//lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
27
#define LOAD_COMMON\
28
    uint32_t attribute_unused * const score_map= c->score_map;\
29
    const int attribute_unused xmin= c->xmin;\
30
    const int attribute_unused ymin= c->ymin;\
31
    const int attribute_unused xmax= c->xmax;\
32
    const int attribute_unused ymax= c->ymax;\
33
    uint8_t *mv_penalty= c->current_mv_penalty;\
34
    const int pred_x= c->pred_x;\
35
    const int pred_y= c->pred_y;\
36

    
37
#define CHECK_HALF_MV(dx, dy, x, y)\
38
{\
39
    const int hx= 2*(x)+(dx);\
40
    const int hy= 2*(y)+(dy);\
41
    d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
42
    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
43
    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
44
}
45

    
46
#if 0
47
static int hpel_motion_search)(MpegEncContext * s,
48
                                  int *mx_ptr, int *my_ptr, int dmin,
49
                                  uint8_t *ref_data[3],
50
                                  int size)
51
{
52
    const int xx = 16 * s->mb_x + 8*(n&1);
53
    const int yy = 16 * s->mb_y + 8*(n>>1);
54
    const int mx = *mx_ptr;
55
    const int my = *my_ptr;
56
    const int penalty_factor= c->sub_penalty_factor;
57

58
    LOAD_COMMON
59

60
 //   INIT;
61
 //FIXME factorize
62
    me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;
63

64
    if(s->no_rounding /*FIXME b_type*/){
65
        hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
66
        chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
67
    }else{
68
        hpel_put=& s->dsp.put_pixels_tab[size];
69
        chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
70
    }
71
    cmpf= s->dsp.me_cmp[size];
72
    chroma_cmpf= s->dsp.me_cmp[size+1];
73
    cmp_sub= s->dsp.me_sub_cmp[size];
74
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
75

76
    if(c->skip){ //FIXME somehow move up (benchmark)
77
        *mx_ptr = 0;
78
        *my_ptr = 0;
79
        return dmin;
80
    }
81

82
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
83
        CMP_HPEL(dmin, 0, 0, mx, my, size);
84
        if(mx || my)
85
            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
86
    }
87

88
    if (mx > xmin && mx < xmax &&
89
        my > ymin && my < ymax) {
90
        int bx=2*mx, by=2*my;
91
        int d= dmin;
92

93
        CHECK_HALF_MV(1, 1, mx-1, my-1)
94
        CHECK_HALF_MV(0, 1, mx  , my-1)
95
        CHECK_HALF_MV(1, 1, mx  , my-1)
96
        CHECK_HALF_MV(1, 0, mx-1, my  )
97
        CHECK_HALF_MV(1, 0, mx  , my  )
98
        CHECK_HALF_MV(1, 1, mx-1, my  )
99
        CHECK_HALF_MV(0, 1, mx  , my  )
100
        CHECK_HALF_MV(1, 1, mx  , my  )
101

102
        assert(bx >= xmin*2 || bx <= xmax*2 || by >= ymin*2 || by <= ymax*2);
103

104
        *mx_ptr = bx;
105
        *my_ptr = by;
106
    }else{
107
        *mx_ptr =2*mx;
108
        *my_ptr =2*my;
109
    }
110

111
    return dmin;
112
}
113

114
#else
115
static int hpel_motion_search(MpegEncContext * s,
116
                                  int *mx_ptr, int *my_ptr, int dmin,
117
                                  int src_index, int ref_index,
118
                                  int size, int h)
119
{
120
    MotionEstContext * const c= &s->me;
121
    const int mx = *mx_ptr;
122
    const int my = *my_ptr;
123
    const int penalty_factor= c->sub_penalty_factor;
124
    me_cmp_func cmp_sub, chroma_cmp_sub;
125
    int bx=2*mx, by=2*my;
126

    
127
    LOAD_COMMON
128
    int flags= c->sub_flags;
129

    
130
 //FIXME factorize
131

    
132
    cmp_sub= s->dsp.me_sub_cmp[size];
133
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
134

    
135
    if(c->skip){ //FIXME move out of hpel?
136
        *mx_ptr = 0;
137
        *my_ptr = 0;
138
        return dmin;
139
    }
140

    
141
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
142
        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
143
        if(mx || my || size>0)
144
            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
145
    }
146

    
147
    if (mx > xmin && mx < xmax &&
148
        my > ymin && my < ymax) {
149
        int d= dmin;
150
        const int index= (my<<ME_MAP_SHIFT) + mx;
151
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
152
                     + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
153
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
154
                     + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
155
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
156
                     + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
157
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
158
                     + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
159

    
160
#if 1
161
        int key;
162
        int map_generation= c->map_generation;
163
#ifndef NDEBUG
164
        uint32_t *map= c->map;
165
#endif
166
        key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
167
        assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
168
        key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
169
        assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
170
        key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
171
        assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
172
        key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
173
        assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
174
#endif
175
        if(t<=b){
176
            CHECK_HALF_MV(0, 1, mx  ,my-1)
177
            if(l<=r){
178
                CHECK_HALF_MV(1, 1, mx-1, my-1)
179
                if(t+r<=b+l){
180
                    CHECK_HALF_MV(1, 1, mx  , my-1)
181
                }else{
182
                    CHECK_HALF_MV(1, 1, mx-1, my  )
183
                }
184
                CHECK_HALF_MV(1, 0, mx-1, my  )
185
            }else{
186
                CHECK_HALF_MV(1, 1, mx  , my-1)
187
                if(t+l<=b+r){
188
                    CHECK_HALF_MV(1, 1, mx-1, my-1)
189
                }else{
190
                    CHECK_HALF_MV(1, 1, mx  , my  )
191
                }
192
                CHECK_HALF_MV(1, 0, mx  , my  )
193
            }
194
        }else{
195
            if(l<=r){
196
                if(t+l<=b+r){
197
                    CHECK_HALF_MV(1, 1, mx-1, my-1)
198
                }else{
199
                    CHECK_HALF_MV(1, 1, mx  , my  )
200
                }
201
                CHECK_HALF_MV(1, 0, mx-1, my)
202
                CHECK_HALF_MV(1, 1, mx-1, my)
203
            }else{
204
                if(t+r<=b+l){
205
                    CHECK_HALF_MV(1, 1, mx  , my-1)
206
                }else{
207
                    CHECK_HALF_MV(1, 1, mx-1, my)
208
                }
209
                CHECK_HALF_MV(1, 0, mx  , my)
210
                CHECK_HALF_MV(1, 1, mx  , my)
211
            }
212
            CHECK_HALF_MV(0, 1, mx  , my)
213
        }
214
        assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
215
    }
216

    
217
    *mx_ptr = bx;
218
    *my_ptr = by;
219

    
220
    return dmin;
221
}
222
#endif
223

    
224
static int no_sub_motion_search(MpegEncContext * s,
225
          int *mx_ptr, int *my_ptr, int dmin,
226
                                  int src_index, int ref_index,
227
                                  int size, int h)
228
{
229
    (*mx_ptr)<<=1;
230
    (*my_ptr)<<=1;
231
    return dmin;
232
}
233

    
234
int inline ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
235
                               int ref_index, int size, int h, int add_rate)
236
{
237
//    const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
238
    MotionEstContext * const c= &s->me;
239
    const int penalty_factor= c->mb_penalty_factor;
240
    const int flags= c->mb_flags;
241
    const int qpel= flags & FLAG_QPEL;
242
    const int mask= 1+2*qpel;
243
    me_cmp_func cmp_sub, chroma_cmp_sub;
244
    int d;
245

    
246
    LOAD_COMMON
247

    
248
 //FIXME factorize
249

    
250
    cmp_sub= s->dsp.mb_cmp[size];
251
    chroma_cmp_sub= s->dsp.mb_cmp[size+1];
252

    
253
//    assert(!c->skip);
254
//    assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
255

    
256
    d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
257
    //FIXME check cbp before adding penalty for (0,0) vector
258
    if(add_rate && (mx || my || size>0))
259
        d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
260

    
261
    return d;
262
}
263

    
264
#define CHECK_QUARTER_MV(dx, dy, x, y)\
265
{\
266
    const int hx= 4*(x)+(dx);\
267
    const int hy= 4*(y)+(dy);\
268
    d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
269
    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
270
    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
271
}
272

    
273
static int qpel_motion_search(MpegEncContext * s,
274
                                  int *mx_ptr, int *my_ptr, int dmin,
275
                                  int src_index, int ref_index,
276
                                  int size, int h)
277
{
278
    MotionEstContext * const c= &s->me;
279
    const int mx = *mx_ptr;
280
    const int my = *my_ptr;
281
    const int penalty_factor= c->sub_penalty_factor;
282
    const int map_generation= c->map_generation;
283
    const int subpel_quality= c->avctx->me_subpel_quality;
284
    uint32_t *map= c->map;
285
    me_cmp_func cmpf, chroma_cmpf;
286
    me_cmp_func cmp_sub, chroma_cmp_sub;
287

    
288
    LOAD_COMMON
289
    int flags= c->sub_flags;
290

    
291
    cmpf= s->dsp.me_cmp[size];
292
    chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
293
 //FIXME factorize
294

    
295
    cmp_sub= s->dsp.me_sub_cmp[size];
296
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
297

    
298
    if(c->skip){ //FIXME somehow move up (benchmark)
299
        *mx_ptr = 0;
300
        *my_ptr = 0;
301
        return dmin;
302
    }
303

    
304
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
305
        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
306
        if(mx || my || size>0)
307
            dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
308
    }
309

    
310
    if (mx > xmin && mx < xmax &&
311
        my > ymin && my < ymax) {
312
        int bx=4*mx, by=4*my;
313
        int d= dmin;
314
        int i, nx, ny;
315
        const int index= (my<<ME_MAP_SHIFT) + mx;
316
        const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
317
        const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
318
        const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
319
        const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
320
        const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
321
        int best[8];
322
        int best_pos[8][2];
323

    
324
        memset(best, 64, sizeof(int)*8);
325
#if 1
326
        if(s->me.dia_size>=2){
327
            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
328
            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
329
            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
330
            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
331

    
332
            for(ny= -3; ny <= 3; ny++){
333
                for(nx= -3; nx <= 3; nx++){
334
                    //FIXME this could overflow (unlikely though)
335
                    const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
336
                    const int64_t c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
337
                    const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
338
                    int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
339
                    int i;
340

    
341
                    if((nx&3)==0 && (ny&3)==0) continue;
342

    
343
                    score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
344

    
345
//                    if(nx&1) score-=1024*c->penalty_factor;
346
//                    if(ny&1) score-=1024*c->penalty_factor;
347

    
348
                    for(i=0; i<8; i++){
349
                        if(score < best[i]){
350
                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
351
                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
352
                            best[i]= score;
353
                            best_pos[i][0]= nx + 4*mx;
354
                            best_pos[i][1]= ny + 4*my;
355
                            break;
356
                        }
357
                    }
358
                }
359
            }
360
        }else{
361
            int tl;
362
            //FIXME this could overflow (unlikely though)
363
            const int cx = 4*(r - l);
364
            const int cx2= r + l - 2*c;
365
            const int cy = 4*(b - t);
366
            const int cy2= b + t - 2*c;
367
            int cxy;
368

    
369
            if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
370
                tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
371
            }else{
372
                tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
373
            }
374

    
375
            cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
376

    
377
            assert(16*cx2 + 4*cx + 32*c == 32*r);
378
            assert(16*cx2 - 4*cx + 32*c == 32*l);
379
            assert(16*cy2 + 4*cy + 32*c == 32*b);
380
            assert(16*cy2 - 4*cy + 32*c == 32*t);
381
            assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
382

    
383
            for(ny= -3; ny <= 3; ny++){
384
                for(nx= -3; nx <= 3; nx++){
385
                    //FIXME this could overflow (unlikely though)
386
                    int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
387
                    int i;
388

    
389
                    if((nx&3)==0 && (ny&3)==0) continue;
390

    
391
                    score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
392
//                    if(nx&1) score-=32*c->penalty_factor;
393
  //                  if(ny&1) score-=32*c->penalty_factor;
394

    
395
                    for(i=0; i<8; i++){
396
                        if(score < best[i]){
397
                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
398
                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
399
                            best[i]= score;
400
                            best_pos[i][0]= nx + 4*mx;
401
                            best_pos[i][1]= ny + 4*my;
402
                            break;
403
                        }
404
                    }
405
                }
406
            }
407
        }
408
        for(i=0; i<subpel_quality; i++){
409
            nx= best_pos[i][0];
410
            ny= best_pos[i][1];
411
            CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
412
        }
413

    
414
#if 0
415
            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
416
            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
417
            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
418
            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
419
//            if(l < r && l < t && l < b && l < tl && l < bl && l < tr && l < br && bl < tl){
420
            if(tl<br){
421

422
//            nx= FFMAX(4*mx - bx, bx - 4*mx);
423
//            ny= FFMAX(4*my - by, by - 4*my);
424

425
            static int stats[7][7], count;
426
            count++;
427
            stats[4*mx - bx + 3][4*my - by + 3]++;
428
            if(256*256*256*64 % count ==0){
429
                for(i=0; i<49; i++){
430
                    if((i%7)==0) printf("\n");
431
                    printf("%6d ", stats[0][i]);
432
                }
433
                printf("\n");
434
            }
435
            }
436
#endif
437
#else
438

    
439
        CHECK_QUARTER_MV(2, 2, mx-1, my-1)
440
        CHECK_QUARTER_MV(0, 2, mx  , my-1)
441
        CHECK_QUARTER_MV(2, 2, mx  , my-1)
442
        CHECK_QUARTER_MV(2, 0, mx  , my  )
443
        CHECK_QUARTER_MV(2, 2, mx  , my  )
444
        CHECK_QUARTER_MV(0, 2, mx  , my  )
445
        CHECK_QUARTER_MV(2, 2, mx-1, my  )
446
        CHECK_QUARTER_MV(2, 0, mx-1, my  )
447

    
448
        nx= bx;
449
        ny= by;
450

    
451
        for(i=0; i<8; i++){
452
            int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
453
            int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
454
            CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2)
455
        }
456
#endif
457
#if 0
458
        //outer ring
459
        CHECK_QUARTER_MV(1, 3, mx-1, my-1)
460
        CHECK_QUARTER_MV(1, 2, mx-1, my-1)
461
        CHECK_QUARTER_MV(1, 1, mx-1, my-1)
462
        CHECK_QUARTER_MV(2, 1, mx-1, my-1)
463
        CHECK_QUARTER_MV(3, 1, mx-1, my-1)
464
        CHECK_QUARTER_MV(0, 1, mx  , my-1)
465
        CHECK_QUARTER_MV(1, 1, mx  , my-1)
466
        CHECK_QUARTER_MV(2, 1, mx  , my-1)
467
        CHECK_QUARTER_MV(3, 1, mx  , my-1)
468
        CHECK_QUARTER_MV(3, 2, mx  , my-1)
469
        CHECK_QUARTER_MV(3, 3, mx  , my-1)
470
        CHECK_QUARTER_MV(3, 0, mx  , my  )
471
        CHECK_QUARTER_MV(3, 1, mx  , my  )
472
        CHECK_QUARTER_MV(3, 2, mx  , my  )
473
        CHECK_QUARTER_MV(3, 3, mx  , my  )
474
        CHECK_QUARTER_MV(2, 3, mx  , my  )
475
        CHECK_QUARTER_MV(1, 3, mx  , my  )
476
        CHECK_QUARTER_MV(0, 3, mx  , my  )
477
        CHECK_QUARTER_MV(3, 3, mx-1, my  )
478
        CHECK_QUARTER_MV(2, 3, mx-1, my  )
479
        CHECK_QUARTER_MV(1, 3, mx-1, my  )
480
        CHECK_QUARTER_MV(1, 2, mx-1, my  )
481
        CHECK_QUARTER_MV(1, 1, mx-1, my  )
482
        CHECK_QUARTER_MV(1, 0, mx-1, my  )
483
#endif
484
        assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
485

    
486
        *mx_ptr = bx;
487
        *my_ptr = by;
488
    }else{
489
        *mx_ptr =4*mx;
490
        *my_ptr =4*my;
491
    }
492

    
493
    return dmin;
494
}
495

    
496

    
497
#define CHECK_MV(x,y)\
498
{\
499
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
500
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
501
    assert((x) >= xmin);\
502
    assert((x) <= xmax);\
503
    assert((y) >= ymin);\
504
    assert((y) <= ymax);\
505
/*printf("check_mv %d %d\n", x, y);*/\
506
    if(map[index]!=key){\
507
        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
508
        map[index]= key;\
509
        score_map[index]= d;\
510
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
511
/*printf("score:%d\n", d);*/\
512
        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
513
    }\
514
}
515

    
516
#define CHECK_CLIPED_MV(ax,ay)\
517
{\
518
    const int x= ax;\
519
    const int y= ay;\
520
    const int x2= FFMAX(xmin, FFMIN(x, xmax));\
521
    const int y2= FFMAX(ymin, FFMIN(y, ymax));\
522
    CHECK_MV(x2, y2)\
523
}
524

    
525
#define CHECK_MV_DIR(x,y,new_dir)\
526
{\
527
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
528
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
529
/*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
530
    if(map[index]!=key){\
531
        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
532
        map[index]= key;\
533
        score_map[index]= d;\
534
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
535
/*printf("score:%d\n", d);*/\
536
        if(d<dmin){\
537
            best[0]=x;\
538
            best[1]=y;\
539
            dmin=d;\
540
            next_dir= new_dir;\
541
        }\
542
    }\
543
}
544

    
545
#define check(x,y,S,v)\
546
if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
547
if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
548
if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
549
if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
550

    
551
#define LOAD_COMMON2\
552
    uint32_t *map= c->map;\
553
    const int qpel= flags&FLAG_QPEL;\
554
    const int shift= 1+qpel;\
555

    
556
static always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
557
                                       int src_index, int ref_index, int const penalty_factor,
558
                                       int size, int h, int flags)
559
{
560
    MotionEstContext * const c= &s->me;
561
    me_cmp_func cmpf, chroma_cmpf;
562
    int next_dir=-1;
563
    LOAD_COMMON
564
    LOAD_COMMON2
565
    int map_generation= c->map_generation;
566

    
567
    cmpf= s->dsp.me_cmp[size];
568
    chroma_cmpf= s->dsp.me_cmp[size+1];
569

    
570
    { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
571
        const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
572
        const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
573
        if(map[index]!=key){ //this will be executed only very rarey
574
            score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
575
            map[index]= key;
576
        }
577
    }
578

    
579
    for(;;){
580
        int d;
581
        const int dir= next_dir;
582
        const int x= best[0];
583
        const int y= best[1];
584
        next_dir=-1;
585

    
586
//printf("%d", dir);
587
        if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
588
        if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
589
        if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
590
        if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
591

    
592
        if(next_dir==-1){
593
            return dmin;
594
        }
595
    }
596
}
597

    
598
static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
599
                                       int src_index, int ref_index, int const penalty_factor,
600
                                       int size, int h, int flags)
601
{
602
    MotionEstContext * const c= &s->me;
603
    me_cmp_func cmpf, chroma_cmpf;
604
    int dia_size;
605
    LOAD_COMMON
606
    LOAD_COMMON2
607
    int map_generation= c->map_generation;
608

    
609
    cmpf= s->dsp.me_cmp[size];
610
    chroma_cmpf= s->dsp.me_cmp[size+1];
611

    
612
    for(dia_size=1; dia_size<=4; dia_size++){
613
        int dir;
614
        const int x= best[0];
615
        const int y= best[1];
616

    
617
        if(dia_size&(dia_size-1)) continue;
618

    
619
        if(   x + dia_size > xmax
620
           || x - dia_size < xmin
621
           || y + dia_size > ymax
622
           || y - dia_size < ymin)
623
           continue;
624

    
625
        for(dir= 0; dir<dia_size; dir+=2){
626
            int d;
627

    
628
            CHECK_MV(x + dir           , y + dia_size - dir);
629
            CHECK_MV(x + dia_size - dir, y - dir           );
630
            CHECK_MV(x - dir           , y - dia_size + dir);
631
            CHECK_MV(x - dia_size + dir, y + dir           );
632
        }
633

    
634
        if(x!=best[0] || y!=best[1])
635
            dia_size=0;
636
#if 0
637
{
638
int dx, dy, i;
639
static int stats[8*8];
640
dx= ABS(x-best[0]);
641
dy= ABS(y-best[1]);
642
if(dy>dx){
643
    dx^=dy; dy^=dx; dx^=dy;
644
}
645
stats[dy*8 + dx] ++;
646
if(256*256*256*64 % (stats[0]+1)==0){
647
    for(i=0; i<64; i++){
648
        if((i&7)==0) printf("\n");
649
        printf("%8d ", stats[i]);
650
    }
651
    printf("\n");
652
}
653
}
654
#endif
655
    }
656
    return dmin;
657
}
658

    
659
#define SAB_CHECK_MV(ax,ay)\
660
{\
661
    const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
662
    const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
663
/*printf("sab check %d %d\n", ax, ay);*/\
664
    if(map[index]!=key){\
665
        d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
666
        map[index]= key;\
667
        score_map[index]= d;\
668
        d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
669
/*printf("score: %d\n", d);*/\
670
        if(d < minima[minima_count-1].height){\
671
            int j=0;\
672
            \
673
            while(d >= minima[j].height) j++;\
674
\
675
            memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
676
\
677
            minima[j].checked= 0;\
678
            minima[j].height= d;\
679
            minima[j].x= ax;\
680
            minima[j].y= ay;\
681
            \
682
            i=-1;\
683
            continue;\
684
        }\
685
    }\
686
}
687

    
688
#define MAX_SAB_SIZE ME_MAP_SIZE
689
static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
690
                                       int src_index, int ref_index, int const penalty_factor,
691
                                       int size, int h, int flags)
692
{
693
    MotionEstContext * const c= &s->me;
694
    me_cmp_func cmpf, chroma_cmpf;
695
    Minima minima[MAX_SAB_SIZE];
696
    const int minima_count= ABS(c->dia_size);
697
    int i, j;
698
    LOAD_COMMON
699
    LOAD_COMMON2
700
    int map_generation= c->map_generation;
701

    
702
    cmpf= s->dsp.me_cmp[size];
703
    chroma_cmpf= s->dsp.me_cmp[size+1];
704

    
705
    for(j=i=0; i<ME_MAP_SIZE; i++){
706
        uint32_t key= map[i];
707

    
708
        key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
709

    
710
        if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
711

    
712
        assert(j<MAX_SAB_SIZE); //max j = number of predictors
713

    
714
        minima[j].height= score_map[i];
715
        minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
716
        minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
717
        minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
718
        minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
719
        minima[j].checked=0;
720
        if(minima[j].x || minima[j].y)
721
            minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
722

    
723
        j++;
724
    }
725

    
726
    qsort(minima, j, sizeof(Minima), minima_cmp);
727

    
728
    for(; j<minima_count; j++){
729
        minima[j].height=256*256*256*64;
730
        minima[j].checked=0;
731
        minima[j].x= minima[j].y=0;
732
    }
733

    
734
    for(i=0; i<minima_count; i++){
735
        const int x= minima[i].x;
736
        const int y= minima[i].y;
737
        int d;
738

    
739
        if(minima[i].checked) continue;
740

    
741
        if(   x >= xmax || x <= xmin
742
           || y >= ymax || y <= ymin)
743
           continue;
744

    
745
        SAB_CHECK_MV(x-1, y)
746
        SAB_CHECK_MV(x+1, y)
747
        SAB_CHECK_MV(x  , y-1)
748
        SAB_CHECK_MV(x  , y+1)
749

    
750
        minima[i].checked= 1;
751
    }
752

    
753
    best[0]= minima[0].x;
754
    best[1]= minima[0].y;
755
    dmin= minima[0].height;
756

    
757
    if(   best[0] < xmax && best[0] > xmin
758
       && best[1] < ymax && best[1] > ymin){
759
        int d;
760
        //ensure that the refernece samples for hpel refinement are in the map
761
        CHECK_MV(best[0]-1, best[1])
762
        CHECK_MV(best[0]+1, best[1])
763
        CHECK_MV(best[0], best[1]-1)
764
        CHECK_MV(best[0], best[1]+1)
765
    }
766
    return dmin;
767
}
768

    
769
static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
770
                                       int src_index, int ref_index, int const penalty_factor,
771
                                       int size, int h, int flags)
772
{
773
    MotionEstContext * const c= &s->me;
774
    me_cmp_func cmpf, chroma_cmpf;
775
    int dia_size;
776
    LOAD_COMMON
777
    LOAD_COMMON2
778
    int map_generation= c->map_generation;
779

    
780
    cmpf= s->dsp.me_cmp[size];
781
    chroma_cmpf= s->dsp.me_cmp[size+1];
782

    
783
    for(dia_size=1; dia_size<=c->dia_size; dia_size++){
784
        int dir, start, end;
785
        const int x= best[0];
786
        const int y= best[1];
787

    
788
        start= FFMAX(0, y + dia_size - ymax);
789
        end  = FFMIN(dia_size, xmax - x + 1);
790
        for(dir= start; dir<end; dir++){
791
            int d;
792

    
793
//check(x + dir,y + dia_size - dir,0, a0)
794
            CHECK_MV(x + dir           , y + dia_size - dir);
795
        }
796

    
797
        start= FFMAX(0, x + dia_size - xmax);
798
        end  = FFMIN(dia_size, y - ymin + 1);
799
        for(dir= start; dir<end; dir++){
800
            int d;
801

    
802
//check(x + dia_size - dir, y - dir,0, a1)
803
            CHECK_MV(x + dia_size - dir, y - dir           );
804
        }
805

    
806
        start= FFMAX(0, -y + dia_size + ymin );
807
        end  = FFMIN(dia_size, x - xmin + 1);
808
        for(dir= start; dir<end; dir++){
809
            int d;
810

    
811
//check(x - dir,y - dia_size + dir,0, a2)
812
            CHECK_MV(x - dir           , y - dia_size + dir);
813
        }
814

    
815
        start= FFMAX(0, -x + dia_size + xmin );
816
        end  = FFMIN(dia_size, ymax - y + 1);
817
        for(dir= start; dir<end; dir++){
818
            int d;
819

    
820
//check(x - dia_size + dir, y + dir,0, a3)
821
            CHECK_MV(x - dia_size + dir, y + dir           );
822
        }
823

    
824
        if(x!=best[0] || y!=best[1])
825
            dia_size=0;
826
#if 0
827
{
828
int dx, dy, i;
829
static int stats[8*8];
830
dx= ABS(x-best[0]);
831
dy= ABS(y-best[1]);
832
stats[dy*8 + dx] ++;
833
if(256*256*256*64 % (stats[0]+1)==0){
834
    for(i=0; i<64; i++){
835
        if((i&7)==0) printf("\n");
836
        printf("%6d ", stats[i]);
837
    }
838
    printf("\n");
839
}
840
}
841
#endif
842
    }
843
    return dmin;
844
}
845

    
846
static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
847
                                       int src_index, int ref_index, int const penalty_factor,
848
                                       int size, int h, int flags){
849
    MotionEstContext * const c= &s->me;
850
    if(c->dia_size==-1)
851
        return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
852
    else if(c->dia_size<-1)
853
        return   sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
854
    else if(c->dia_size<2)
855
        return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
856
    else
857
        return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
858
}
859

    
860
static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
861
                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
862
                             int ref_mv_scale, int flags, int size, int h)
863
{
864
    MotionEstContext * const c= &s->me;
865
    int best[2]={0, 0};
866
    int d, dmin;
867
    int map_generation;
868
    int penalty_factor;
869
    const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
870
    const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
871
    me_cmp_func cmpf, chroma_cmpf;
872

    
873
    LOAD_COMMON
874
    LOAD_COMMON2
875

    
876
    if(c->pre_pass){
877
        penalty_factor= c->pre_penalty_factor;
878
        cmpf= s->dsp.me_pre_cmp[size];
879
        chroma_cmpf= s->dsp.me_pre_cmp[size+1];
880
    }else{
881
        penalty_factor= c->penalty_factor;
882
        cmpf= s->dsp.me_cmp[size];
883
        chroma_cmpf= s->dsp.me_cmp[size+1];
884
    }
885

    
886
    map_generation= update_map_generation(c);
887

    
888
    assert(cmpf);
889
    dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
890
    map[0]= map_generation;
891
    score_map[0]= dmin;
892

    
893
    /* first line */
894
    if (s->first_slice_line) {
895
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
896
        CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
897
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
898
    }else{
899
        if(dmin<h*h && ( P_LEFT[0]    |P_LEFT[1]
900
                        |P_TOP[0]     |P_TOP[1]
901
                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
902
            *mx_ptr= 0;
903
            *my_ptr= 0;
904
            c->skip=1;
905
            return dmin;
906
        }
907
        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
908
        if(dmin>h*h*2){
909
            CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
910
                            (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
911
            CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
912
            CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
913
            CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
914
        }
915
    }
916
    if(dmin>h*h*4){
917
        if(c->pre_pass){
918
            CHECK_CLIPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
919
                            (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
920
            if(!s->first_slice_line)
921
                CHECK_CLIPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
922
                                (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
923
        }else{
924
            CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
925
                            (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
926
            if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
927
                CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
928
                                (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
929
        }
930
    }
931

    
932
    if(c->avctx->last_predictor_count){
933
        const int count= c->avctx->last_predictor_count;
934
        const int xstart= FFMAX(0, s->mb_x - count);
935
        const int ystart= FFMAX(0, s->mb_y - count);
936
        const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
937
        const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
938
        int mb_y;
939

    
940
        for(mb_y=ystart; mb_y<yend; mb_y++){
941
            int mb_x;
942
            for(mb_x=xstart; mb_x<xend; mb_x++){
943
                const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
944
                int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
945
                int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
946

    
947
                if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
948
                CHECK_MV(mx,my)
949
            }
950
        }
951
    }
952

    
953
//check(best[0],best[1],0, b0)
954
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
955

    
956
//check(best[0],best[1],0, b1)
957
    *mx_ptr= best[0];
958
    *my_ptr= best[1];
959

    
960
//    printf("%d %d %d \n", best[0], best[1], dmin);
961
    return dmin;
962
}
963

    
964
//this function is dedicated to the braindamaged gcc
965
inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
966
                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
967
                             int ref_mv_scale, int size, int h)
968
{
969
    MotionEstContext * const c= &s->me;
970
//FIXME convert other functions in the same way if faster
971
    if(c->flags==0 && h==16 && size==0){
972
        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
973
//    case FLAG_QPEL:
974
//        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
975
    }else{
976
        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
977
    }
978
}
979

    
980
static int epzs_motion_search4(MpegEncContext * s,
981
                             int *mx_ptr, int *my_ptr, int P[10][2],
982
                             int src_index, int ref_index, int16_t (*last_mv)[2],
983
                             int ref_mv_scale)
984
{
985
    MotionEstContext * const c= &s->me;
986
    int best[2]={0, 0};
987
    int d, dmin;
988
    int map_generation;
989
    const int penalty_factor= c->penalty_factor;
990
    const int size=1;
991
    const int h=8;
992
    const int ref_mv_stride= s->mb_stride;
993
    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
994
    me_cmp_func cmpf, chroma_cmpf;
995
    LOAD_COMMON
996
    int flags= c->flags;
997
    LOAD_COMMON2
998

    
999
    cmpf= s->dsp.me_cmp[size];
1000
    chroma_cmpf= s->dsp.me_cmp[size+1];
1001

    
1002
    map_generation= update_map_generation(c);
1003

    
1004
    dmin = 1000000;
1005
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1006
    /* first line */
1007
    if (s->first_slice_line) {
1008
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1009
        CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1010
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1011
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1012
    }else{
1013
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1014
        //FIXME try some early stop
1015
        if(dmin>64*2){
1016
            CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1017
            CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1018
            CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1019
            CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1020
            CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1021
                            (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1022
        }
1023
    }
1024
    if(dmin>64*4){
1025
        CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1026
                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1027
        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1028
            CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1029
                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1030
    }
1031

    
1032
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1033

    
1034
    *mx_ptr= best[0];
1035
    *my_ptr= best[1];
1036

    
1037
//    printf("%d %d %d \n", best[0], best[1], dmin);
1038
    return dmin;
1039
}
1040

    
1041
//try to merge with above FIXME (needs PSNR test)
1042
static int epzs_motion_search2(MpegEncContext * s,
1043
                             int *mx_ptr, int *my_ptr, int P[10][2],
1044
                             int src_index, int ref_index, int16_t (*last_mv)[2],
1045
                             int ref_mv_scale)
1046
{
1047
    MotionEstContext * const c= &s->me;
1048
    int best[2]={0, 0};
1049
    int d, dmin;
1050
    int map_generation;
1051
    const int penalty_factor= c->penalty_factor;
1052
    const int size=0; //FIXME pass as arg
1053
    const int h=8;
1054
    const int ref_mv_stride= s->mb_stride;
1055
    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1056
    me_cmp_func cmpf, chroma_cmpf;
1057
    LOAD_COMMON
1058
    int flags= c->flags;
1059
    LOAD_COMMON2
1060

    
1061
    cmpf= s->dsp.me_cmp[size];
1062
    chroma_cmpf= s->dsp.me_cmp[size+1];
1063

    
1064
    map_generation= update_map_generation(c);
1065

    
1066
    dmin = 1000000;
1067
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1068
    /* first line */
1069
    if (s->first_slice_line) {
1070
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1071
        CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1072
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1073
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1074
    }else{
1075
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1076
        //FIXME try some early stop
1077
        if(dmin>64*2){
1078
            CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1079
            CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1080
            CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1081
            CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1082
            CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1083
                            (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1084
        }
1085
    }
1086
    if(dmin>64*4){
1087
        CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1088
                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1089
        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1090
            CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1091
                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1092
    }
1093

    
1094
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1095

    
1096
    *mx_ptr= best[0];
1097
    *my_ptr= best[1];
1098

    
1099
//    printf("%d %d %d \n", best[0], best[1], dmin);
1100
    return dmin;
1101
}