Statistics
| Branch: | Revision:

ffmpeg / libavcodec / motion_est_template.c @ bb21f176

History | View | Annotate | Download (43.5 KB)

1
/*
2
 * Motion estimation
3
 * Copyright (c) 2002-2004 Michael Niedermayer
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 *
21
 */
22

    
23
/**
24
 * @file motion_est_template.c
25
 * Motion estimation template.
26
 */
27

    
28
//lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
29
#define LOAD_COMMON\
30
    uint32_t attribute_unused * const score_map= c->score_map;\
31
    const int attribute_unused xmin= c->xmin;\
32
    const int attribute_unused ymin= c->ymin;\
33
    const int attribute_unused xmax= c->xmax;\
34
    const int attribute_unused ymax= c->ymax;\
35
    uint8_t *mv_penalty= c->current_mv_penalty;\
36
    const int pred_x= c->pred_x;\
37
    const int pred_y= c->pred_y;\
38

    
39
#define CHECK_HALF_MV(dx, dy, x, y)\
40
{\
41
    const int hx= 2*(x)+(dx);\
42
    const int hy= 2*(y)+(dy);\
43
    d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
44
    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
45
    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
46
}
47

    
48
#if 0
49
static int hpel_motion_search)(MpegEncContext * s,
50
                                  int *mx_ptr, int *my_ptr, int dmin,
51
                                  uint8_t *ref_data[3],
52
                                  int size)
53
{
54
    const int xx = 16 * s->mb_x + 8*(n&1);
55
    const int yy = 16 * s->mb_y + 8*(n>>1);
56
    const int mx = *mx_ptr;
57
    const int my = *my_ptr;
58
    const int penalty_factor= c->sub_penalty_factor;
59

60
    LOAD_COMMON
61

62
 //   INIT;
63
 //FIXME factorize
64
    me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;
65

66
    if(s->no_rounding /*FIXME b_type*/){
67
        hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
68
        chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
69
    }else{
70
        hpel_put=& s->dsp.put_pixels_tab[size];
71
        chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
72
    }
73
    cmpf= s->dsp.me_cmp[size];
74
    chroma_cmpf= s->dsp.me_cmp[size+1];
75
    cmp_sub= s->dsp.me_sub_cmp[size];
76
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
77

78
    if(c->skip){ //FIXME somehow move up (benchmark)
79
        *mx_ptr = 0;
80
        *my_ptr = 0;
81
        return dmin;
82
    }
83

84
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
85
        CMP_HPEL(dmin, 0, 0, mx, my, size);
86
        if(mx || my)
87
            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
88
    }
89

90
    if (mx > xmin && mx < xmax &&
91
        my > ymin && my < ymax) {
92
        int bx=2*mx, by=2*my;
93
        int d= dmin;
94

95
        CHECK_HALF_MV(1, 1, mx-1, my-1)
96
        CHECK_HALF_MV(0, 1, mx  , my-1)
97
        CHECK_HALF_MV(1, 1, mx  , my-1)
98
        CHECK_HALF_MV(1, 0, mx-1, my  )
99
        CHECK_HALF_MV(1, 0, mx  , my  )
100
        CHECK_HALF_MV(1, 1, mx-1, my  )
101
        CHECK_HALF_MV(0, 1, mx  , my  )
102
        CHECK_HALF_MV(1, 1, mx  , my  )
103

104
        assert(bx >= xmin*2 || bx <= xmax*2 || by >= ymin*2 || by <= ymax*2);
105

106
        *mx_ptr = bx;
107
        *my_ptr = by;
108
    }else{
109
        *mx_ptr =2*mx;
110
        *my_ptr =2*my;
111
    }
112

113
    return dmin;
114
}
115

116
#else
117
static int hpel_motion_search(MpegEncContext * s,
118
                                  int *mx_ptr, int *my_ptr, int dmin,
119
                                  int src_index, int ref_index,
120
                                  int size, int h)
121
{
122
    MotionEstContext * const c= &s->me;
123
    const int mx = *mx_ptr;
124
    const int my = *my_ptr;
125
    const int penalty_factor= c->sub_penalty_factor;
126
    me_cmp_func cmp_sub, chroma_cmp_sub;
127
    int bx=2*mx, by=2*my;
128

    
129
    LOAD_COMMON
130
    int flags= c->sub_flags;
131

    
132
 //FIXME factorize
133

    
134
    cmp_sub= s->dsp.me_sub_cmp[size];
135
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
136

    
137
    if(c->skip){ //FIXME move out of hpel?
138
        *mx_ptr = 0;
139
        *my_ptr = 0;
140
        return dmin;
141
    }
142

    
143
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
144
        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
145
        if(mx || my || size>0)
146
            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
147
    }
148

    
149
    if (mx > xmin && mx < xmax &&
150
        my > ymin && my < ymax) {
151
        int d= dmin;
152
        const int index= (my<<ME_MAP_SHIFT) + mx;
153
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
154
                     + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
155
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
156
                     + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
157
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
158
                     + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
159
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
160
                     + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
161

    
162
#if 1
163
        int key;
164
        int map_generation= c->map_generation;
165
#ifndef NDEBUG
166
        uint32_t *map= c->map;
167
#endif
168
        key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
169
        assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
170
        key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
171
        assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
172
        key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
173
        assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
174
        key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
175
        assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
176
#endif
177
        if(t<=b){
178
            CHECK_HALF_MV(0, 1, mx  ,my-1)
179
            if(l<=r){
180
                CHECK_HALF_MV(1, 1, mx-1, my-1)
181
                if(t+r<=b+l){
182
                    CHECK_HALF_MV(1, 1, mx  , my-1)
183
                }else{
184
                    CHECK_HALF_MV(1, 1, mx-1, my  )
185
                }
186
                CHECK_HALF_MV(1, 0, mx-1, my  )
187
            }else{
188
                CHECK_HALF_MV(1, 1, mx  , my-1)
189
                if(t+l<=b+r){
190
                    CHECK_HALF_MV(1, 1, mx-1, my-1)
191
                }else{
192
                    CHECK_HALF_MV(1, 1, mx  , my  )
193
                }
194
                CHECK_HALF_MV(1, 0, mx  , my  )
195
            }
196
        }else{
197
            if(l<=r){
198
                if(t+l<=b+r){
199
                    CHECK_HALF_MV(1, 1, mx-1, my-1)
200
                }else{
201
                    CHECK_HALF_MV(1, 1, mx  , my  )
202
                }
203
                CHECK_HALF_MV(1, 0, mx-1, my)
204
                CHECK_HALF_MV(1, 1, mx-1, my)
205
            }else{
206
                if(t+r<=b+l){
207
                    CHECK_HALF_MV(1, 1, mx  , my-1)
208
                }else{
209
                    CHECK_HALF_MV(1, 1, mx-1, my)
210
                }
211
                CHECK_HALF_MV(1, 0, mx  , my)
212
                CHECK_HALF_MV(1, 1, mx  , my)
213
            }
214
            CHECK_HALF_MV(0, 1, mx  , my)
215
        }
216
        assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
217
    }
218

    
219
    *mx_ptr = bx;
220
    *my_ptr = by;
221

    
222
    return dmin;
223
}
224
#endif
225

    
226
static int no_sub_motion_search(MpegEncContext * s,
227
          int *mx_ptr, int *my_ptr, int dmin,
228
                                  int src_index, int ref_index,
229
                                  int size, int h)
230
{
231
    (*mx_ptr)<<=1;
232
    (*my_ptr)<<=1;
233
    return dmin;
234
}
235

    
236
inline int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
237
                               int ref_index, int size, int h, int add_rate)
238
{
239
//    const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
240
    MotionEstContext * const c= &s->me;
241
    const int penalty_factor= c->mb_penalty_factor;
242
    const int flags= c->mb_flags;
243
    const int qpel= flags & FLAG_QPEL;
244
    const int mask= 1+2*qpel;
245
    me_cmp_func cmp_sub, chroma_cmp_sub;
246
    int d;
247

    
248
    LOAD_COMMON
249

    
250
 //FIXME factorize
251

    
252
    cmp_sub= s->dsp.mb_cmp[size];
253
    chroma_cmp_sub= s->dsp.mb_cmp[size+1];
254

    
255
//    assert(!c->skip);
256
//    assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
257

    
258
    d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
259
    //FIXME check cbp before adding penalty for (0,0) vector
260
    if(add_rate && (mx || my || size>0))
261
        d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
262

    
263
    return d;
264
}
265

    
266
#define CHECK_QUARTER_MV(dx, dy, x, y)\
267
{\
268
    const int hx= 4*(x)+(dx);\
269
    const int hy= 4*(y)+(dy);\
270
    d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
271
    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
272
    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
273
}
274

    
275
static int qpel_motion_search(MpegEncContext * s,
276
                                  int *mx_ptr, int *my_ptr, int dmin,
277
                                  int src_index, int ref_index,
278
                                  int size, int h)
279
{
280
    MotionEstContext * const c= &s->me;
281
    const int mx = *mx_ptr;
282
    const int my = *my_ptr;
283
    const int penalty_factor= c->sub_penalty_factor;
284
    const int map_generation= c->map_generation;
285
    const int subpel_quality= c->avctx->me_subpel_quality;
286
    uint32_t *map= c->map;
287
    me_cmp_func cmpf, chroma_cmpf;
288
    me_cmp_func cmp_sub, chroma_cmp_sub;
289

    
290
    LOAD_COMMON
291
    int flags= c->sub_flags;
292

    
293
    cmpf= s->dsp.me_cmp[size];
294
    chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
295
 //FIXME factorize
296

    
297
    cmp_sub= s->dsp.me_sub_cmp[size];
298
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
299

    
300
    if(c->skip){ //FIXME somehow move up (benchmark)
301
        *mx_ptr = 0;
302
        *my_ptr = 0;
303
        return dmin;
304
    }
305

    
306
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
307
        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
308
        if(mx || my || size>0)
309
            dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
310
    }
311

    
312
    if (mx > xmin && mx < xmax &&
313
        my > ymin && my < ymax) {
314
        int bx=4*mx, by=4*my;
315
        int d= dmin;
316
        int i, nx, ny;
317
        const int index= (my<<ME_MAP_SHIFT) + mx;
318
        const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
319
        const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
320
        const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
321
        const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
322
        const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
323
        int best[8];
324
        int best_pos[8][2];
325

    
326
        memset(best, 64, sizeof(int)*8);
327
#if 1
328
        if(s->me.dia_size>=2){
329
            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
330
            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
331
            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
332
            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
333

    
334
            for(ny= -3; ny <= 3; ny++){
335
                for(nx= -3; nx <= 3; nx++){
336
                    //FIXME this could overflow (unlikely though)
337
                    const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
338
                    const int64_t c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
339
                    const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
340
                    int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
341
                    int i;
342

    
343
                    if((nx&3)==0 && (ny&3)==0) continue;
344

    
345
                    score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
346

    
347
//                    if(nx&1) score-=1024*c->penalty_factor;
348
//                    if(ny&1) score-=1024*c->penalty_factor;
349

    
350
                    for(i=0; i<8; i++){
351
                        if(score < best[i]){
352
                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
353
                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
354
                            best[i]= score;
355
                            best_pos[i][0]= nx + 4*mx;
356
                            best_pos[i][1]= ny + 4*my;
357
                            break;
358
                        }
359
                    }
360
                }
361
            }
362
        }else{
363
            int tl;
364
            //FIXME this could overflow (unlikely though)
365
            const int cx = 4*(r - l);
366
            const int cx2= r + l - 2*c;
367
            const int cy = 4*(b - t);
368
            const int cy2= b + t - 2*c;
369
            int cxy;
370

    
371
            if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
372
                tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
373
            }else{
374
                tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
375
            }
376

    
377
            cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
378

    
379
            assert(16*cx2 + 4*cx + 32*c == 32*r);
380
            assert(16*cx2 - 4*cx + 32*c == 32*l);
381
            assert(16*cy2 + 4*cy + 32*c == 32*b);
382
            assert(16*cy2 - 4*cy + 32*c == 32*t);
383
            assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
384

    
385
            for(ny= -3; ny <= 3; ny++){
386
                for(nx= -3; nx <= 3; nx++){
387
                    //FIXME this could overflow (unlikely though)
388
                    int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
389
                    int i;
390

    
391
                    if((nx&3)==0 && (ny&3)==0) continue;
392

    
393
                    score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
394
//                    if(nx&1) score-=32*c->penalty_factor;
395
  //                  if(ny&1) score-=32*c->penalty_factor;
396

    
397
                    for(i=0; i<8; i++){
398
                        if(score < best[i]){
399
                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
400
                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
401
                            best[i]= score;
402
                            best_pos[i][0]= nx + 4*mx;
403
                            best_pos[i][1]= ny + 4*my;
404
                            break;
405
                        }
406
                    }
407
                }
408
            }
409
        }
410
        for(i=0; i<subpel_quality; i++){
411
            nx= best_pos[i][0];
412
            ny= best_pos[i][1];
413
            CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
414
        }
415

    
416
#if 0
417
            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
418
            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
419
            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
420
            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
421
//            if(l < r && l < t && l < b && l < tl && l < bl && l < tr && l < br && bl < tl){
422
            if(tl<br){
423

424
//            nx= FFMAX(4*mx - bx, bx - 4*mx);
425
//            ny= FFMAX(4*my - by, by - 4*my);
426

427
            static int stats[7][7], count;
428
            count++;
429
            stats[4*mx - bx + 3][4*my - by + 3]++;
430
            if(256*256*256*64 % count ==0){
431
                for(i=0; i<49; i++){
432
                    if((i%7)==0) printf("\n");
433
                    printf("%6d ", stats[0][i]);
434
                }
435
                printf("\n");
436
            }
437
            }
438
#endif
439
#else
440

    
441
        CHECK_QUARTER_MV(2, 2, mx-1, my-1)
442
        CHECK_QUARTER_MV(0, 2, mx  , my-1)
443
        CHECK_QUARTER_MV(2, 2, mx  , my-1)
444
        CHECK_QUARTER_MV(2, 0, mx  , my  )
445
        CHECK_QUARTER_MV(2, 2, mx  , my  )
446
        CHECK_QUARTER_MV(0, 2, mx  , my  )
447
        CHECK_QUARTER_MV(2, 2, mx-1, my  )
448
        CHECK_QUARTER_MV(2, 0, mx-1, my  )
449

    
450
        nx= bx;
451
        ny= by;
452

    
453
        for(i=0; i<8; i++){
454
            int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
455
            int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
456
            CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2)
457
        }
458
#endif
459
#if 0
460
        //outer ring
461
        CHECK_QUARTER_MV(1, 3, mx-1, my-1)
462
        CHECK_QUARTER_MV(1, 2, mx-1, my-1)
463
        CHECK_QUARTER_MV(1, 1, mx-1, my-1)
464
        CHECK_QUARTER_MV(2, 1, mx-1, my-1)
465
        CHECK_QUARTER_MV(3, 1, mx-1, my-1)
466
        CHECK_QUARTER_MV(0, 1, mx  , my-1)
467
        CHECK_QUARTER_MV(1, 1, mx  , my-1)
468
        CHECK_QUARTER_MV(2, 1, mx  , my-1)
469
        CHECK_QUARTER_MV(3, 1, mx  , my-1)
470
        CHECK_QUARTER_MV(3, 2, mx  , my-1)
471
        CHECK_QUARTER_MV(3, 3, mx  , my-1)
472
        CHECK_QUARTER_MV(3, 0, mx  , my  )
473
        CHECK_QUARTER_MV(3, 1, mx  , my  )
474
        CHECK_QUARTER_MV(3, 2, mx  , my  )
475
        CHECK_QUARTER_MV(3, 3, mx  , my  )
476
        CHECK_QUARTER_MV(2, 3, mx  , my  )
477
        CHECK_QUARTER_MV(1, 3, mx  , my  )
478
        CHECK_QUARTER_MV(0, 3, mx  , my  )
479
        CHECK_QUARTER_MV(3, 3, mx-1, my  )
480
        CHECK_QUARTER_MV(2, 3, mx-1, my  )
481
        CHECK_QUARTER_MV(1, 3, mx-1, my  )
482
        CHECK_QUARTER_MV(1, 2, mx-1, my  )
483
        CHECK_QUARTER_MV(1, 1, mx-1, my  )
484
        CHECK_QUARTER_MV(1, 0, mx-1, my  )
485
#endif
486
        assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
487

    
488
        *mx_ptr = bx;
489
        *my_ptr = by;
490
    }else{
491
        *mx_ptr =4*mx;
492
        *my_ptr =4*my;
493
    }
494

    
495
    return dmin;
496
}
497

    
498

    
499
#define CHECK_MV(x,y)\
500
{\
501
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
502
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
503
    assert((x) >= xmin);\
504
    assert((x) <= xmax);\
505
    assert((y) >= ymin);\
506
    assert((y) <= ymax);\
507
/*printf("check_mv %d %d\n", x, y);*/\
508
    if(map[index]!=key){\
509
        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
510
        map[index]= key;\
511
        score_map[index]= d;\
512
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
513
/*printf("score:%d\n", d);*/\
514
        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
515
    }\
516
}
517

    
518
#define CHECK_CLIPPED_MV(ax,ay)\
519
{\
520
    const int Lx= ax;\
521
    const int Ly= ay;\
522
    const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
523
    const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
524
    CHECK_MV(Lx2, Ly2)\
525
}
526

    
527
#define CHECK_MV_DIR(x,y,new_dir)\
528
{\
529
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
530
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
531
/*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
532
    if(map[index]!=key){\
533
        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
534
        map[index]= key;\
535
        score_map[index]= d;\
536
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
537
/*printf("score:%d\n", d);*/\
538
        if(d<dmin){\
539
            best[0]=x;\
540
            best[1]=y;\
541
            dmin=d;\
542
            next_dir= new_dir;\
543
        }\
544
    }\
545
}
546

    
547
#define check(x,y,S,v)\
548
if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
549
if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
550
if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
551
if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
552

    
553
#define LOAD_COMMON2\
554
    uint32_t *map= c->map;\
555
    const int qpel= flags&FLAG_QPEL;\
556
    const int shift= 1+qpel;\
557

    
558
static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
559
                                       int src_index, int ref_index, int const penalty_factor,
560
                                       int size, int h, int flags)
561
{
562
    MotionEstContext * const c= &s->me;
563
    me_cmp_func cmpf, chroma_cmpf;
564
    int next_dir=-1;
565
    LOAD_COMMON
566
    LOAD_COMMON2
567
    int map_generation= c->map_generation;
568

    
569
    cmpf= s->dsp.me_cmp[size];
570
    chroma_cmpf= s->dsp.me_cmp[size+1];
571

    
572
    { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
573
        const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
574
        const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
575
        if(map[index]!=key){ //this will be executed only very rarey
576
            score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
577
            map[index]= key;
578
        }
579
    }
580

    
581
    for(;;){
582
        int d;
583
        const int dir= next_dir;
584
        const int x= best[0];
585
        const int y= best[1];
586
        next_dir=-1;
587

    
588
//printf("%d", dir);
589
        if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
590
        if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
591
        if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
592
        if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
593

    
594
        if(next_dir==-1){
595
            return dmin;
596
        }
597
    }
598
}
599

    
600
static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
601
                                       int src_index, int ref_index, int const penalty_factor,
602
                                       int size, int h, int flags)
603
{
604
    MotionEstContext * const c= &s->me;
605
    me_cmp_func cmpf, chroma_cmpf;
606
    int dia_size;
607
    LOAD_COMMON
608
    LOAD_COMMON2
609
    int map_generation= c->map_generation;
610

    
611
    cmpf= s->dsp.me_cmp[size];
612
    chroma_cmpf= s->dsp.me_cmp[size+1];
613

    
614
    for(dia_size=1; dia_size<=4; dia_size++){
615
        int dir;
616
        const int x= best[0];
617
        const int y= best[1];
618

    
619
        if(dia_size&(dia_size-1)) continue;
620

    
621
        if(   x + dia_size > xmax
622
           || x - dia_size < xmin
623
           || y + dia_size > ymax
624
           || y - dia_size < ymin)
625
           continue;
626

    
627
        for(dir= 0; dir<dia_size; dir+=2){
628
            int d;
629

    
630
            CHECK_MV(x + dir           , y + dia_size - dir);
631
            CHECK_MV(x + dia_size - dir, y - dir           );
632
            CHECK_MV(x - dir           , y - dia_size + dir);
633
            CHECK_MV(x - dia_size + dir, y + dir           );
634
        }
635

    
636
        if(x!=best[0] || y!=best[1])
637
            dia_size=0;
638
#if 0
639
{
640
int dx, dy, i;
641
static int stats[8*8];
642
dx= FFABS(x-best[0]);
643
dy= FFABS(y-best[1]);
644
if(dy>dx){
645
    dx^=dy; dy^=dx; dx^=dy;
646
}
647
stats[dy*8 + dx] ++;
648
if(256*256*256*64 % (stats[0]+1)==0){
649
    for(i=0; i<64; i++){
650
        if((i&7)==0) printf("\n");
651
        printf("%8d ", stats[i]);
652
    }
653
    printf("\n");
654
}
655
}
656
#endif
657
    }
658
    return dmin;
659
}
660

    
661
static int hex_search(MpegEncContext * s, int *best, int dmin,
662
                                       int src_index, int ref_index, int const penalty_factor,
663
                                       int size, int h, int flags, int dia_size)
664
{
665
    MotionEstContext * const c= &s->me;
666
    me_cmp_func cmpf, chroma_cmpf;
667
    LOAD_COMMON
668
    LOAD_COMMON2
669
    int map_generation= c->map_generation;
670
    int x,y,d;
671
    const int dec= dia_size & (dia_size-1);
672

    
673
    cmpf= s->dsp.me_cmp[size];
674
    chroma_cmpf= s->dsp.me_cmp[size+1];
675

    
676
    for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
677
        do{
678
            x= best[0];
679
            y= best[1];
680

    
681
            CHECK_CLIPPED_MV(x  -dia_size    , y);
682
            CHECK_CLIPPED_MV(x+  dia_size    , y);
683
            CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
684
            CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
685
            if(dia_size>1){
686
                CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
687
                CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
688
            }
689
        }while(best[0] != x || best[1] != y);
690
    }
691

    
692
    return dmin;
693
}
694

    
695
static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
696
                                       int src_index, int ref_index, int const penalty_factor,
697
                                       int size, int h, int flags)
698
{
699
    MotionEstContext * const c= &s->me;
700
    me_cmp_func cmpf, chroma_cmpf;
701
    LOAD_COMMON
702
    LOAD_COMMON2
703
    int map_generation= c->map_generation;
704
    int x,y,i,d;
705
    int dia_size= c->dia_size&0xFF;
706
    const int dec= dia_size & (dia_size-1);
707
    static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
708
                                { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
709

    
710
    cmpf= s->dsp.me_cmp[size];
711
    chroma_cmpf= s->dsp.me_cmp[size+1];
712

    
713
    for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
714
        do{
715
            x= best[0];
716
            y= best[1];
717
            for(i=0; i<8; i++){
718
                CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
719
            }
720
        }while(best[0] != x || best[1] != y);
721
    }
722

    
723
    x= best[0];
724
    y= best[1];
725
    CHECK_CLIPPED_MV(x+1, y);
726
    CHECK_CLIPPED_MV(x, y+1);
727
    CHECK_CLIPPED_MV(x-1, y);
728
    CHECK_CLIPPED_MV(x, y-1);
729

    
730
    return dmin;
731
}
732

    
733
static int umh_search(MpegEncContext * s, int *best, int dmin,
734
                                       int src_index, int ref_index, int const penalty_factor,
735
                                       int size, int h, int flags)
736
{
737
    MotionEstContext * const c= &s->me;
738
    me_cmp_func cmpf, chroma_cmpf;
739
    LOAD_COMMON
740
    LOAD_COMMON2
741
    int map_generation= c->map_generation;
742
    int x,y,x2,y2, i, j, d;
743
    const int dia_size= c->dia_size&0xFE;
744
    static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
745
                                 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
746
                                 {-2, 3}, { 0, 4}, { 2, 3},
747
                                 {-2,-3}, { 0,-4}, { 2,-3},};
748

    
749
    cmpf= s->dsp.me_cmp[size];
750
    chroma_cmpf= s->dsp.me_cmp[size+1];
751

    
752
    x= best[0];
753
    y= best[1];
754
    for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
755
        CHECK_MV(x2, y);
756
    }
757
    for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
758
        CHECK_MV(x, y2);
759
    }
760

    
761
    x= best[0];
762
    y= best[1];
763
    for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
764
        for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
765
            CHECK_MV(x2, y2);
766
        }
767
    }
768

    
769
//FIXME prevent the CLIP stuff
770

    
771
    for(j=1; j<=dia_size/4; j++){
772
        for(i=0; i<16; i++){
773
            CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
774
        }
775
    }
776

    
777
    return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
778
}
779

    
780
#define SAB_CHECK_MV(ax,ay)\
781
{\
782
    const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
783
    const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
784
/*printf("sab check %d %d\n", ax, ay);*/\
785
    if(map[index]!=key){\
786
        d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
787
        map[index]= key;\
788
        score_map[index]= d;\
789
        d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
790
/*printf("score: %d\n", d);*/\
791
        if(d < minima[minima_count-1].height){\
792
            int j=0;\
793
            \
794
            while(d >= minima[j].height) j++;\
795
\
796
            memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
797
\
798
            minima[j].checked= 0;\
799
            minima[j].height= d;\
800
            minima[j].x= ax;\
801
            minima[j].y= ay;\
802
            \
803
            i=-1;\
804
            continue;\
805
        }\
806
    }\
807
}
808

    
809
#define MAX_SAB_SIZE ME_MAP_SIZE
810
static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
811
                                       int src_index, int ref_index, int const penalty_factor,
812
                                       int size, int h, int flags)
813
{
814
    MotionEstContext * const c= &s->me;
815
    me_cmp_func cmpf, chroma_cmpf;
816
    Minima minima[MAX_SAB_SIZE];
817
    const int minima_count= FFABS(c->dia_size);
818
    int i, j;
819
    LOAD_COMMON
820
    LOAD_COMMON2
821
    int map_generation= c->map_generation;
822

    
823
    cmpf= s->dsp.me_cmp[size];
824
    chroma_cmpf= s->dsp.me_cmp[size+1];
825

    
826
    /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
827
      become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
828
     */
829
    for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
830
        uint32_t key= map[i];
831

    
832
        key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
833

    
834
        if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
835

    
836
        minima[j].height= score_map[i];
837
        minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
838
        minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
839
        minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
840
        minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
841

    
842
        // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
843
        if(   minima[j].x > xmax || minima[j].x < xmin
844
           || minima[j].y > ymax || minima[j].y < ymin)
845
            continue;
846

    
847
        minima[j].checked=0;
848
        if(minima[j].x || minima[j].y)
849
            minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
850

    
851
        j++;
852
    }
853

    
854
    qsort(minima, j, sizeof(Minima), minima_cmp);
855

    
856
    for(; j<minima_count; j++){
857
        minima[j].height=256*256*256*64;
858
        minima[j].checked=0;
859
        minima[j].x= minima[j].y=0;
860
    }
861

    
862
    for(i=0; i<minima_count; i++){
863
        const int x= minima[i].x;
864
        const int y= minima[i].y;
865
        int d;
866

    
867
        if(minima[i].checked) continue;
868

    
869
        if(   x >= xmax || x <= xmin
870
           || y >= ymax || y <= ymin)
871
           continue;
872

    
873
        SAB_CHECK_MV(x-1, y)
874
        SAB_CHECK_MV(x+1, y)
875
        SAB_CHECK_MV(x  , y-1)
876
        SAB_CHECK_MV(x  , y+1)
877

    
878
        minima[i].checked= 1;
879
    }
880

    
881
    best[0]= minima[0].x;
882
    best[1]= minima[0].y;
883
    dmin= minima[0].height;
884

    
885
    if(   best[0] < xmax && best[0] > xmin
886
       && best[1] < ymax && best[1] > ymin){
887
        int d;
888
        //ensure that the refernece samples for hpel refinement are in the map
889
        CHECK_MV(best[0]-1, best[1])
890
        CHECK_MV(best[0]+1, best[1])
891
        CHECK_MV(best[0], best[1]-1)
892
        CHECK_MV(best[0], best[1]+1)
893
    }
894
    return dmin;
895
}
896

    
897
static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
898
                                       int src_index, int ref_index, int const penalty_factor,
899
                                       int size, int h, int flags)
900
{
901
    MotionEstContext * const c= &s->me;
902
    me_cmp_func cmpf, chroma_cmpf;
903
    int dia_size;
904
    LOAD_COMMON
905
    LOAD_COMMON2
906
    int map_generation= c->map_generation;
907

    
908
    cmpf= s->dsp.me_cmp[size];
909
    chroma_cmpf= s->dsp.me_cmp[size+1];
910

    
911
    for(dia_size=1; dia_size<=c->dia_size; dia_size++){
912
        int dir, start, end;
913
        const int x= best[0];
914
        const int y= best[1];
915

    
916
        start= FFMAX(0, y + dia_size - ymax);
917
        end  = FFMIN(dia_size, xmax - x + 1);
918
        for(dir= start; dir<end; dir++){
919
            int d;
920

    
921
//check(x + dir,y + dia_size - dir,0, a0)
922
            CHECK_MV(x + dir           , y + dia_size - dir);
923
        }
924

    
925
        start= FFMAX(0, x + dia_size - xmax);
926
        end  = FFMIN(dia_size, y - ymin + 1);
927
        for(dir= start; dir<end; dir++){
928
            int d;
929

    
930
//check(x + dia_size - dir, y - dir,0, a1)
931
            CHECK_MV(x + dia_size - dir, y - dir           );
932
        }
933

    
934
        start= FFMAX(0, -y + dia_size + ymin );
935
        end  = FFMIN(dia_size, x - xmin + 1);
936
        for(dir= start; dir<end; dir++){
937
            int d;
938

    
939
//check(x - dir,y - dia_size + dir,0, a2)
940
            CHECK_MV(x - dir           , y - dia_size + dir);
941
        }
942

    
943
        start= FFMAX(0, -x + dia_size + xmin );
944
        end  = FFMIN(dia_size, ymax - y + 1);
945
        for(dir= start; dir<end; dir++){
946
            int d;
947

    
948
//check(x - dia_size + dir, y + dir,0, a3)
949
            CHECK_MV(x - dia_size + dir, y + dir           );
950
        }
951

    
952
        if(x!=best[0] || y!=best[1])
953
            dia_size=0;
954
#if 0
955
{
956
int dx, dy, i;
957
static int stats[8*8];
958
dx= FFABS(x-best[0]);
959
dy= FFABS(y-best[1]);
960
stats[dy*8 + dx] ++;
961
if(256*256*256*64 % (stats[0]+1)==0){
962
    for(i=0; i<64; i++){
963
        if((i&7)==0) printf("\n");
964
        printf("%6d ", stats[i]);
965
    }
966
    printf("\n");
967
}
968
}
969
#endif
970
    }
971
    return dmin;
972
}
973

    
974
static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
975
                                       int src_index, int ref_index, int const penalty_factor,
976
                                       int size, int h, int flags){
977
    MotionEstContext * const c= &s->me;
978
    if(c->dia_size==-1)
979
        return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
980
    else if(c->dia_size<-1)
981
        return   sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
982
    else if(c->dia_size<2)
983
        return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
984
    else if(c->dia_size>768)
985
        return           umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
986
    else if(c->dia_size>512)
987
        return           hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
988
    else if(c->dia_size>256)
989
        return       l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
990
    else
991
        return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
992
}
993

    
994
/*!
995
   \param P[10][2] a list of candidate mvs to check before starting the
996
   iterative search. If one of the candidates is close to the optimal mv, then
997
   it takes fewer iterations. And it increases the chance that we find the
998
   optimal mv.
999
 */
1000
static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
1001
                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
1002
                             int ref_mv_scale, int flags, int size, int h)
1003
{
1004
    MotionEstContext * const c= &s->me;
1005
    int best[2]={0, 0};      /*!< x and y coordinates of the best motion vector.
1006
                               i.e. the difference between the position of the
1007
                               block current being encoded and the position of
1008
                               the block chosen to predict it from. */
1009
    int d;                   ///< the score (cmp + penalty) of any given mv
1010
    int dmin;                /*!< the best value of d, i.e. the score
1011
                               corresponding to the mv stored in best[]. */
1012
    int map_generation;
1013
    int penalty_factor;
1014
    const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
1015
    const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
1016
    me_cmp_func cmpf, chroma_cmpf;
1017

    
1018
    LOAD_COMMON
1019
    LOAD_COMMON2
1020

    
1021
    if(c->pre_pass){
1022
        penalty_factor= c->pre_penalty_factor;
1023
        cmpf= s->dsp.me_pre_cmp[size];
1024
        chroma_cmpf= s->dsp.me_pre_cmp[size+1];
1025
    }else{
1026
        penalty_factor= c->penalty_factor;
1027
        cmpf= s->dsp.me_cmp[size];
1028
        chroma_cmpf= s->dsp.me_cmp[size+1];
1029
    }
1030

    
1031
    map_generation= update_map_generation(c);
1032

    
1033
    assert(cmpf);
1034
    dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
1035
    map[0]= map_generation;
1036
    score_map[0]= dmin;
1037

    
1038
    //FIXME precalc first term below?
1039
    if((s->pict_type == B_TYPE && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
1040
        dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
1041

    
1042
    /* first line */
1043
    if (s->first_slice_line) {
1044
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1045
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1046
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1047
    }else{
1048
        if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
1049
                    && ( P_LEFT[0]    |P_LEFT[1]
1050
                        |P_TOP[0]     |P_TOP[1]
1051
                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
1052
            *mx_ptr= 0;
1053
            *my_ptr= 0;
1054
            c->skip=1;
1055
            return dmin;
1056
        }
1057
        CHECK_MV(    P_MEDIAN[0] >>shift ,    P_MEDIAN[1] >>shift)
1058
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)-1)
1059
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)+1)
1060
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift)  )
1061
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift)  )
1062
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1063
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1064
        CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
1065
        CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
1066
        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1067
    }
1068
    if(dmin>h*h*4){
1069
        if(c->pre_pass){
1070
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
1071
                            (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
1072
            if(!s->first_slice_line)
1073
                CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1074
                                (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1075
        }else{
1076
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1077
                            (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1078
            if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1079
                CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1080
                                (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1081
        }
1082
    }
1083

    
1084
    if(c->avctx->last_predictor_count){
1085
        const int count= c->avctx->last_predictor_count;
1086
        const int xstart= FFMAX(0, s->mb_x - count);
1087
        const int ystart= FFMAX(0, s->mb_y - count);
1088
        const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
1089
        const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
1090
        int mb_y;
1091

    
1092
        for(mb_y=ystart; mb_y<yend; mb_y++){
1093
            int mb_x;
1094
            for(mb_x=xstart; mb_x<xend; mb_x++){
1095
                const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
1096
                int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
1097
                int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
1098

    
1099
                if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
1100
                CHECK_MV(mx,my)
1101
            }
1102
        }
1103
    }
1104

    
1105
//check(best[0],best[1],0, b0)
1106
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1107

    
1108
//check(best[0],best[1],0, b1)
1109
    *mx_ptr= best[0];
1110
    *my_ptr= best[1];
1111

    
1112
//    printf("%d %d %d \n", best[0], best[1], dmin);
1113
    return dmin;
1114
}
1115

    
1116
//this function is dedicated to the braindamaged gcc
1117
inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
1118
                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
1119
                             int ref_mv_scale, int size, int h)
1120
{
1121
    MotionEstContext * const c= &s->me;
1122
//FIXME convert other functions in the same way if faster
1123
    if(c->flags==0 && h==16 && size==0){
1124
        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
1125
//    case FLAG_QPEL:
1126
//        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
1127
    }else{
1128
        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
1129
    }
1130
}
1131

    
1132
static int epzs_motion_search4(MpegEncContext * s,
1133
                             int *mx_ptr, int *my_ptr, int P[10][2],
1134
                             int src_index, int ref_index, int16_t (*last_mv)[2],
1135
                             int ref_mv_scale)
1136
{
1137
    MotionEstContext * const c= &s->me;
1138
    int best[2]={0, 0};
1139
    int d, dmin;
1140
    int map_generation;
1141
    const int penalty_factor= c->penalty_factor;
1142
    const int size=1;
1143
    const int h=8;
1144
    const int ref_mv_stride= s->mb_stride;
1145
    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1146
    me_cmp_func cmpf, chroma_cmpf;
1147
    LOAD_COMMON
1148
    int flags= c->flags;
1149
    LOAD_COMMON2
1150

    
1151
    cmpf= s->dsp.me_cmp[size];
1152
    chroma_cmpf= s->dsp.me_cmp[size+1];
1153

    
1154
    map_generation= update_map_generation(c);
1155

    
1156
    dmin = 1000000;
1157
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1158
    /* first line */
1159
    if (s->first_slice_line) {
1160
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1161
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1162
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1163
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1164
    }else{
1165
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1166
        //FIXME try some early stop
1167
        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1168
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1169
        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1170
        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1171
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1172
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1173
    }
1174
    if(dmin>64*4){
1175
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1176
                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1177
        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1178
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1179
                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1180
    }
1181

    
1182
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1183

    
1184
    *mx_ptr= best[0];
1185
    *my_ptr= best[1];
1186

    
1187
//    printf("%d %d %d \n", best[0], best[1], dmin);
1188
    return dmin;
1189
}
1190

    
1191
//try to merge with above FIXME (needs PSNR test)
1192
static int epzs_motion_search2(MpegEncContext * s,
1193
                             int *mx_ptr, int *my_ptr, int P[10][2],
1194
                             int src_index, int ref_index, int16_t (*last_mv)[2],
1195
                             int ref_mv_scale)
1196
{
1197
    MotionEstContext * const c= &s->me;
1198
    int best[2]={0, 0};
1199
    int d, dmin;
1200
    int map_generation;
1201
    const int penalty_factor= c->penalty_factor;
1202
    const int size=0; //FIXME pass as arg
1203
    const int h=8;
1204
    const int ref_mv_stride= s->mb_stride;
1205
    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1206
    me_cmp_func cmpf, chroma_cmpf;
1207
    LOAD_COMMON
1208
    int flags= c->flags;
1209
    LOAD_COMMON2
1210

    
1211
    cmpf= s->dsp.me_cmp[size];
1212
    chroma_cmpf= s->dsp.me_cmp[size+1];
1213

    
1214
    map_generation= update_map_generation(c);
1215

    
1216
    dmin = 1000000;
1217
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1218
    /* first line */
1219
    if (s->first_slice_line) {
1220
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1221
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1222
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1223
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1224
    }else{
1225
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1226
        //FIXME try some early stop
1227
        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1228
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1229
        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1230
        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1231
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1232
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1233
    }
1234
    if(dmin>64*4){
1235
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1236
                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1237
        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1238
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1239
                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1240
    }
1241

    
1242
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1243

    
1244
    *mx_ptr= best[0];
1245
    *my_ptr= best[1];
1246

    
1247
//    printf("%d %d %d \n", best[0], best[1], dmin);
1248
    return dmin;
1249
}