Statistics
| Branch: | Revision:

ffmpeg / libavcodec / motion_est_template.c @ ce5e49b0

History | View | Annotate | Download (43.9 KB)

1
/*
2
 * Motion estimation
3
 * Copyright (c) 2002-2004 Michael Niedermayer
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

    
22
/**
23
 * @file
24
 * Motion estimation template.
25
 */
26

    
27
//Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
28
#define LOAD_COMMON\
29
    uint32_t av_unused * const score_map= c->score_map;\
30
    const int av_unused xmin= c->xmin;\
31
    const int av_unused ymin= c->ymin;\
32
    const int av_unused xmax= c->xmax;\
33
    const int av_unused ymax= c->ymax;\
34
    uint8_t *mv_penalty= c->current_mv_penalty;\
35
    const int pred_x= c->pred_x;\
36
    const int pred_y= c->pred_y;\
37

    
38
#define CHECK_HALF_MV(dx, dy, x, y)\
39
{\
40
    const int hx= 2*(x)+(dx);\
41
    const int hy= 2*(y)+(dy);\
42
    d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
43
    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
44
    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
45
}
46

    
47
#if 0
48
static int hpel_motion_search)(MpegEncContext * s,
49
                                  int *mx_ptr, int *my_ptr, int dmin,
50
                                  uint8_t *ref_data[3],
51
                                  int size)
52
{
53
    const int xx = 16 * s->mb_x + 8*(n&1);
54
    const int yy = 16 * s->mb_y + 8*(n>>1);
55
    const int mx = *mx_ptr;
56
    const int my = *my_ptr;
57
    const int penalty_factor= c->sub_penalty_factor;
58

59
    LOAD_COMMON
60

61
 //   INIT;
62
 //FIXME factorize
63
    me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;
64

65
    if(s->no_rounding /*FIXME b_type*/){
66
        hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
67
        chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
68
    }else{
69
        hpel_put=& s->dsp.put_pixels_tab[size];
70
        chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
71
    }
72
    cmpf= s->dsp.me_cmp[size];
73
    chroma_cmpf= s->dsp.me_cmp[size+1];
74
    cmp_sub= s->dsp.me_sub_cmp[size];
75
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
76

77
    if(c->skip){ //FIXME somehow move up (benchmark)
78
        *mx_ptr = 0;
79
        *my_ptr = 0;
80
        return dmin;
81
    }
82

83
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
84
        CMP_HPEL(dmin, 0, 0, mx, my, size);
85
        if(mx || my)
86
            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
87
    }
88

89
    if (mx > xmin && mx < xmax &&
90
        my > ymin && my < ymax) {
91
        int bx=2*mx, by=2*my;
92
        int d= dmin;
93

94
        CHECK_HALF_MV(1, 1, mx-1, my-1)
95
        CHECK_HALF_MV(0, 1, mx  , my-1)
96
        CHECK_HALF_MV(1, 1, mx  , my-1)
97
        CHECK_HALF_MV(1, 0, mx-1, my  )
98
        CHECK_HALF_MV(1, 0, mx  , my  )
99
        CHECK_HALF_MV(1, 1, mx-1, my  )
100
        CHECK_HALF_MV(0, 1, mx  , my  )
101
        CHECK_HALF_MV(1, 1, mx  , my  )
102

103
        assert(bx >= xmin*2 || bx <= xmax*2 || by >= ymin*2 || by <= ymax*2);
104

105
        *mx_ptr = bx;
106
        *my_ptr = by;
107
    }else{
108
        *mx_ptr =2*mx;
109
        *my_ptr =2*my;
110
    }
111

112
    return dmin;
113
}
114

115
#else
116
static int hpel_motion_search(MpegEncContext * s,
117
                                  int *mx_ptr, int *my_ptr, int dmin,
118
                                  int src_index, int ref_index,
119
                                  int size, int h)
120
{
121
    MotionEstContext * const c= &s->me;
122
    const int mx = *mx_ptr;
123
    const int my = *my_ptr;
124
    const int penalty_factor= c->sub_penalty_factor;
125
    me_cmp_func cmp_sub, chroma_cmp_sub;
126
    int bx=2*mx, by=2*my;
127

    
128
    LOAD_COMMON
129
    int flags= c->sub_flags;
130

    
131
 //FIXME factorize
132

    
133
    cmp_sub= s->dsp.me_sub_cmp[size];
134
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
135

    
136
    if(c->skip){ //FIXME move out of hpel?
137
        *mx_ptr = 0;
138
        *my_ptr = 0;
139
        return dmin;
140
    }
141

    
142
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
143
        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
144
        if(mx || my || size>0)
145
            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
146
    }
147

    
148
    if (mx > xmin && mx < xmax &&
149
        my > ymin && my < ymax) {
150
        int d= dmin;
151
        const int index= (my<<ME_MAP_SHIFT) + mx;
152
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
153
                     + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
154
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
155
                     + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
156
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
157
                     + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
158
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
159
                     + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
160

    
161
#if 1
162
        int key;
163
        int map_generation= c->map_generation;
164
#ifndef NDEBUG
165
        uint32_t *map= c->map;
166
#endif
167
        key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
168
        assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
169
        key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
170
        assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
171
        key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
172
        assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
173
        key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
174
        assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
175
#endif
176
        if(t<=b){
177
            CHECK_HALF_MV(0, 1, mx  ,my-1)
178
            if(l<=r){
179
                CHECK_HALF_MV(1, 1, mx-1, my-1)
180
                if(t+r<=b+l){
181
                    CHECK_HALF_MV(1, 1, mx  , my-1)
182
                }else{
183
                    CHECK_HALF_MV(1, 1, mx-1, my  )
184
                }
185
                CHECK_HALF_MV(1, 0, mx-1, my  )
186
            }else{
187
                CHECK_HALF_MV(1, 1, mx  , my-1)
188
                if(t+l<=b+r){
189
                    CHECK_HALF_MV(1, 1, mx-1, my-1)
190
                }else{
191
                    CHECK_HALF_MV(1, 1, mx  , my  )
192
                }
193
                CHECK_HALF_MV(1, 0, mx  , my  )
194
            }
195
        }else{
196
            if(l<=r){
197
                if(t+l<=b+r){
198
                    CHECK_HALF_MV(1, 1, mx-1, my-1)
199
                }else{
200
                    CHECK_HALF_MV(1, 1, mx  , my  )
201
                }
202
                CHECK_HALF_MV(1, 0, mx-1, my)
203
                CHECK_HALF_MV(1, 1, mx-1, my)
204
            }else{
205
                if(t+r<=b+l){
206
                    CHECK_HALF_MV(1, 1, mx  , my-1)
207
                }else{
208
                    CHECK_HALF_MV(1, 1, mx-1, my)
209
                }
210
                CHECK_HALF_MV(1, 0, mx  , my)
211
                CHECK_HALF_MV(1, 1, mx  , my)
212
            }
213
            CHECK_HALF_MV(0, 1, mx  , my)
214
        }
215
        assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
216
    }
217

    
218
    *mx_ptr = bx;
219
    *my_ptr = by;
220

    
221
    return dmin;
222
}
223
#endif
224

    
225
static int no_sub_motion_search(MpegEncContext * s,
226
          int *mx_ptr, int *my_ptr, int dmin,
227
                                  int src_index, int ref_index,
228
                                  int size, int h)
229
{
230
    (*mx_ptr)<<=1;
231
    (*my_ptr)<<=1;
232
    return dmin;
233
}
234

    
235
inline int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
236
                               int ref_index, int size, int h, int add_rate)
237
{
238
//    const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
239
    MotionEstContext * const c= &s->me;
240
    const int penalty_factor= c->mb_penalty_factor;
241
    const int flags= c->mb_flags;
242
    const int qpel= flags & FLAG_QPEL;
243
    const int mask= 1+2*qpel;
244
    me_cmp_func cmp_sub, chroma_cmp_sub;
245
    int d;
246

    
247
    LOAD_COMMON
248

    
249
 //FIXME factorize
250

    
251
    cmp_sub= s->dsp.mb_cmp[size];
252
    chroma_cmp_sub= s->dsp.mb_cmp[size+1];
253

    
254
//    assert(!c->skip);
255
//    assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
256

    
257
    d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
258
    //FIXME check cbp before adding penalty for (0,0) vector
259
    if(add_rate && (mx || my || size>0))
260
        d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
261

    
262
    return d;
263
}
264

    
265
#define CHECK_QUARTER_MV(dx, dy, x, y)\
266
{\
267
    const int hx= 4*(x)+(dx);\
268
    const int hy= 4*(y)+(dy);\
269
    d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
270
    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
271
    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
272
}
273

    
274
static int qpel_motion_search(MpegEncContext * s,
275
                                  int *mx_ptr, int *my_ptr, int dmin,
276
                                  int src_index, int ref_index,
277
                                  int size, int h)
278
{
279
    MotionEstContext * const c= &s->me;
280
    const int mx = *mx_ptr;
281
    const int my = *my_ptr;
282
    const int penalty_factor= c->sub_penalty_factor;
283
    const int map_generation= c->map_generation;
284
    const int subpel_quality= c->avctx->me_subpel_quality;
285
    uint32_t *map= c->map;
286
    me_cmp_func cmpf, chroma_cmpf;
287
    me_cmp_func cmp_sub, chroma_cmp_sub;
288

    
289
    LOAD_COMMON
290
    int flags= c->sub_flags;
291

    
292
    cmpf= s->dsp.me_cmp[size];
293
    chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
294
 //FIXME factorize
295

    
296
    cmp_sub= s->dsp.me_sub_cmp[size];
297
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
298

    
299
    if(c->skip){ //FIXME somehow move up (benchmark)
300
        *mx_ptr = 0;
301
        *my_ptr = 0;
302
        return dmin;
303
    }
304

    
305
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
306
        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
307
        if(mx || my || size>0)
308
            dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
309
    }
310

    
311
    if (mx > xmin && mx < xmax &&
312
        my > ymin && my < ymax) {
313
        int bx=4*mx, by=4*my;
314
        int d= dmin;
315
        int i, nx, ny;
316
        const int index= (my<<ME_MAP_SHIFT) + mx;
317
        const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
318
        const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
319
        const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
320
        const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
321
        const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
322
        int best[8];
323
        int best_pos[8][2];
324

    
325
        memset(best, 64, sizeof(int)*8);
326
#if 1
327
        if(s->me.dia_size>=2){
328
            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
329
            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
330
            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
331
            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
332

    
333
            for(ny= -3; ny <= 3; ny++){
334
                for(nx= -3; nx <= 3; nx++){
335
                    //FIXME this could overflow (unlikely though)
336
                    const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
337
                    const int64_t c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
338
                    const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
339
                    int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
340
                    int i;
341

    
342
                    if((nx&3)==0 && (ny&3)==0) continue;
343

    
344
                    score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
345

    
346
//                    if(nx&1) score-=1024*c->penalty_factor;
347
//                    if(ny&1) score-=1024*c->penalty_factor;
348

    
349
                    for(i=0; i<8; i++){
350
                        if(score < best[i]){
351
                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
352
                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
353
                            best[i]= score;
354
                            best_pos[i][0]= nx + 4*mx;
355
                            best_pos[i][1]= ny + 4*my;
356
                            break;
357
                        }
358
                    }
359
                }
360
            }
361
        }else{
362
            int tl;
363
            //FIXME this could overflow (unlikely though)
364
            const int cx = 4*(r - l);
365
            const int cx2= r + l - 2*c;
366
            const int cy = 4*(b - t);
367
            const int cy2= b + t - 2*c;
368
            int cxy;
369

    
370
            if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
371
                tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
372
            }else{
373
                tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
374
            }
375

    
376
            cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
377

    
378
            assert(16*cx2 + 4*cx + 32*c == 32*r);
379
            assert(16*cx2 - 4*cx + 32*c == 32*l);
380
            assert(16*cy2 + 4*cy + 32*c == 32*b);
381
            assert(16*cy2 - 4*cy + 32*c == 32*t);
382
            assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
383

    
384
            for(ny= -3; ny <= 3; ny++){
385
                for(nx= -3; nx <= 3; nx++){
386
                    //FIXME this could overflow (unlikely though)
387
                    int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
388
                    int i;
389

    
390
                    if((nx&3)==0 && (ny&3)==0) continue;
391

    
392
                    score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
393
//                    if(nx&1) score-=32*c->penalty_factor;
394
  //                  if(ny&1) score-=32*c->penalty_factor;
395

    
396
                    for(i=0; i<8; i++){
397
                        if(score < best[i]){
398
                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
399
                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
400
                            best[i]= score;
401
                            best_pos[i][0]= nx + 4*mx;
402
                            best_pos[i][1]= ny + 4*my;
403
                            break;
404
                        }
405
                    }
406
                }
407
            }
408
        }
409
        for(i=0; i<subpel_quality; i++){
410
            nx= best_pos[i][0];
411
            ny= best_pos[i][1];
412
            CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
413
        }
414

    
415
#if 0
416
            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
417
            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
418
            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
419
            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
420
//            if(l < r && l < t && l < b && l < tl && l < bl && l < tr && l < br && bl < tl){
421
            if(tl<br){
422

423
//            nx= FFMAX(4*mx - bx, bx - 4*mx);
424
//            ny= FFMAX(4*my - by, by - 4*my);
425

426
            static int stats[7][7], count;
427
            count++;
428
            stats[4*mx - bx + 3][4*my - by + 3]++;
429
            if(256*256*256*64 % count ==0){
430
                for(i=0; i<49; i++){
431
                    if((i%7)==0) printf("\n");
432
                    printf("%6d ", stats[0][i]);
433
                }
434
                printf("\n");
435
            }
436
            }
437
#endif
438
#else
439

    
440
        CHECK_QUARTER_MV(2, 2, mx-1, my-1)
441
        CHECK_QUARTER_MV(0, 2, mx  , my-1)
442
        CHECK_QUARTER_MV(2, 2, mx  , my-1)
443
        CHECK_QUARTER_MV(2, 0, mx  , my  )
444
        CHECK_QUARTER_MV(2, 2, mx  , my  )
445
        CHECK_QUARTER_MV(0, 2, mx  , my  )
446
        CHECK_QUARTER_MV(2, 2, mx-1, my  )
447
        CHECK_QUARTER_MV(2, 0, mx-1, my  )
448

    
449
        nx= bx;
450
        ny= by;
451

    
452
        for(i=0; i<8; i++){
453
            int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
454
            int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
455
            CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2)
456
        }
457
#endif
458
#if 0
459
        //outer ring
460
        CHECK_QUARTER_MV(1, 3, mx-1, my-1)
461
        CHECK_QUARTER_MV(1, 2, mx-1, my-1)
462
        CHECK_QUARTER_MV(1, 1, mx-1, my-1)
463
        CHECK_QUARTER_MV(2, 1, mx-1, my-1)
464
        CHECK_QUARTER_MV(3, 1, mx-1, my-1)
465
        CHECK_QUARTER_MV(0, 1, mx  , my-1)
466
        CHECK_QUARTER_MV(1, 1, mx  , my-1)
467
        CHECK_QUARTER_MV(2, 1, mx  , my-1)
468
        CHECK_QUARTER_MV(3, 1, mx  , my-1)
469
        CHECK_QUARTER_MV(3, 2, mx  , my-1)
470
        CHECK_QUARTER_MV(3, 3, mx  , my-1)
471
        CHECK_QUARTER_MV(3, 0, mx  , my  )
472
        CHECK_QUARTER_MV(3, 1, mx  , my  )
473
        CHECK_QUARTER_MV(3, 2, mx  , my  )
474
        CHECK_QUARTER_MV(3, 3, mx  , my  )
475
        CHECK_QUARTER_MV(2, 3, mx  , my  )
476
        CHECK_QUARTER_MV(1, 3, mx  , my  )
477
        CHECK_QUARTER_MV(0, 3, mx  , my  )
478
        CHECK_QUARTER_MV(3, 3, mx-1, my  )
479
        CHECK_QUARTER_MV(2, 3, mx-1, my  )
480
        CHECK_QUARTER_MV(1, 3, mx-1, my  )
481
        CHECK_QUARTER_MV(1, 2, mx-1, my  )
482
        CHECK_QUARTER_MV(1, 1, mx-1, my  )
483
        CHECK_QUARTER_MV(1, 0, mx-1, my  )
484
#endif
485
        assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
486

    
487
        *mx_ptr = bx;
488
        *my_ptr = by;
489
    }else{
490
        *mx_ptr =4*mx;
491
        *my_ptr =4*my;
492
    }
493

    
494
    return dmin;
495
}
496

    
497

    
498
#define CHECK_MV(x,y)\
499
{\
500
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
501
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
502
    assert((x) >= xmin);\
503
    assert((x) <= xmax);\
504
    assert((y) >= ymin);\
505
    assert((y) <= ymax);\
506
/*printf("check_mv %d %d\n", x, y);*/\
507
    if(map[index]!=key){\
508
        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
509
        map[index]= key;\
510
        score_map[index]= d;\
511
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
512
/*printf("score:%d\n", d);*/\
513
        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
514
    }\
515
}
516

    
517
#define CHECK_CLIPPED_MV(ax,ay)\
518
{\
519
    const int Lx= ax;\
520
    const int Ly= ay;\
521
    const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
522
    const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
523
    CHECK_MV(Lx2, Ly2)\
524
}
525

    
526
#define CHECK_MV_DIR(x,y,new_dir)\
527
{\
528
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
529
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
530
/*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
531
    if(map[index]!=key){\
532
        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
533
        map[index]= key;\
534
        score_map[index]= d;\
535
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
536
/*printf("score:%d\n", d);*/\
537
        if(d<dmin){\
538
            best[0]=x;\
539
            best[1]=y;\
540
            dmin=d;\
541
            next_dir= new_dir;\
542
        }\
543
    }\
544
}
545

    
546
#define check(x,y,S,v)\
547
if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
548
if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
549
if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
550
if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
551

    
552
#define LOAD_COMMON2\
553
    uint32_t *map= c->map;\
554
    const int qpel= flags&FLAG_QPEL;\
555
    const int shift= 1+qpel;\
556

    
557
static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
558
                                       int src_index, int ref_index, int const penalty_factor,
559
                                       int size, int h, int flags)
560
{
561
    MotionEstContext * const c= &s->me;
562
    me_cmp_func cmpf, chroma_cmpf;
563
    int next_dir=-1;
564
    LOAD_COMMON
565
    LOAD_COMMON2
566
    int map_generation= c->map_generation;
567

    
568
    cmpf= s->dsp.me_cmp[size];
569
    chroma_cmpf= s->dsp.me_cmp[size+1];
570

    
571
    { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
572
        const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
573
        const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
574
        if(map[index]!=key){ //this will be executed only very rarey
575
            score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
576
            map[index]= key;
577
        }
578
    }
579

    
580
    for(;;){
581
        int d;
582
        const int dir= next_dir;
583
        const int x= best[0];
584
        const int y= best[1];
585
        next_dir=-1;
586

    
587
//printf("%d", dir);
588
        if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
589
        if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
590
        if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
591
        if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
592

    
593
        if(next_dir==-1){
594
            return dmin;
595
        }
596
    }
597
}
598

    
599
static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
600
                                       int src_index, int ref_index, int const penalty_factor,
601
                                       int size, int h, int flags)
602
{
603
    MotionEstContext * const c= &s->me;
604
    me_cmp_func cmpf, chroma_cmpf;
605
    int dia_size;
606
    LOAD_COMMON
607
    LOAD_COMMON2
608
    int map_generation= c->map_generation;
609

    
610
    cmpf= s->dsp.me_cmp[size];
611
    chroma_cmpf= s->dsp.me_cmp[size+1];
612

    
613
    for(dia_size=1; dia_size<=4; dia_size++){
614
        int dir;
615
        const int x= best[0];
616
        const int y= best[1];
617

    
618
        if(dia_size&(dia_size-1)) continue;
619

    
620
        if(   x + dia_size > xmax
621
           || x - dia_size < xmin
622
           || y + dia_size > ymax
623
           || y - dia_size < ymin)
624
           continue;
625

    
626
        for(dir= 0; dir<dia_size; dir+=2){
627
            int d;
628

    
629
            CHECK_MV(x + dir           , y + dia_size - dir);
630
            CHECK_MV(x + dia_size - dir, y - dir           );
631
            CHECK_MV(x - dir           , y - dia_size + dir);
632
            CHECK_MV(x - dia_size + dir, y + dir           );
633
        }
634

    
635
        if(x!=best[0] || y!=best[1])
636
            dia_size=0;
637
    }
638
    return dmin;
639
}
640

    
641
static int hex_search(MpegEncContext * s, int *best, int dmin,
642
                                       int src_index, int ref_index, int const penalty_factor,
643
                                       int size, int h, int flags, int dia_size)
644
{
645
    MotionEstContext * const c= &s->me;
646
    me_cmp_func cmpf, chroma_cmpf;
647
    LOAD_COMMON
648
    LOAD_COMMON2
649
    int map_generation= c->map_generation;
650
    int x,y,d;
651
    const int dec= dia_size & (dia_size-1);
652

    
653
    cmpf= s->dsp.me_cmp[size];
654
    chroma_cmpf= s->dsp.me_cmp[size+1];
655

    
656
    for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
657
        do{
658
            x= best[0];
659
            y= best[1];
660

    
661
            CHECK_CLIPPED_MV(x  -dia_size    , y);
662
            CHECK_CLIPPED_MV(x+  dia_size    , y);
663
            CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
664
            CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
665
            if(dia_size>1){
666
                CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
667
                CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
668
            }
669
        }while(best[0] != x || best[1] != y);
670
    }
671

    
672
    return dmin;
673
}
674

    
675
static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
676
                                       int src_index, int ref_index, int const penalty_factor,
677
                                       int size, int h, int flags)
678
{
679
    MotionEstContext * const c= &s->me;
680
    me_cmp_func cmpf, chroma_cmpf;
681
    LOAD_COMMON
682
    LOAD_COMMON2
683
    int map_generation= c->map_generation;
684
    int x,y,i,d;
685
    int dia_size= c->dia_size&0xFF;
686
    const int dec= dia_size & (dia_size-1);
687
    static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
688
                                { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
689

    
690
    cmpf= s->dsp.me_cmp[size];
691
    chroma_cmpf= s->dsp.me_cmp[size+1];
692

    
693
    for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
694
        do{
695
            x= best[0];
696
            y= best[1];
697
            for(i=0; i<8; i++){
698
                CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
699
            }
700
        }while(best[0] != x || best[1] != y);
701
    }
702

    
703
    x= best[0];
704
    y= best[1];
705
    CHECK_CLIPPED_MV(x+1, y);
706
    CHECK_CLIPPED_MV(x, y+1);
707
    CHECK_CLIPPED_MV(x-1, y);
708
    CHECK_CLIPPED_MV(x, y-1);
709

    
710
    return dmin;
711
}
712

    
713
static int umh_search(MpegEncContext * s, int *best, int dmin,
714
                                       int src_index, int ref_index, int const penalty_factor,
715
                                       int size, int h, int flags)
716
{
717
    MotionEstContext * const c= &s->me;
718
    me_cmp_func cmpf, chroma_cmpf;
719
    LOAD_COMMON
720
    LOAD_COMMON2
721
    int map_generation= c->map_generation;
722
    int x,y,x2,y2, i, j, d;
723
    const int dia_size= c->dia_size&0xFE;
724
    static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
725
                                 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
726
                                 {-2, 3}, { 0, 4}, { 2, 3},
727
                                 {-2,-3}, { 0,-4}, { 2,-3},};
728

    
729
    cmpf= s->dsp.me_cmp[size];
730
    chroma_cmpf= s->dsp.me_cmp[size+1];
731

    
732
    x= best[0];
733
    y= best[1];
734
    for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
735
        CHECK_MV(x2, y);
736
    }
737
    for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
738
        CHECK_MV(x, y2);
739
    }
740

    
741
    x= best[0];
742
    y= best[1];
743
    for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
744
        for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
745
            CHECK_MV(x2, y2);
746
        }
747
    }
748

    
749
//FIXME prevent the CLIP stuff
750

    
751
    for(j=1; j<=dia_size/4; j++){
752
        for(i=0; i<16; i++){
753
            CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
754
        }
755
    }
756

    
757
    return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
758
}
759

    
760
static int full_search(MpegEncContext * s, int *best, int dmin,
761
                                       int src_index, int ref_index, int const penalty_factor,
762
                                       int size, int h, int flags)
763
{
764
    MotionEstContext * const c= &s->me;
765
    me_cmp_func cmpf, chroma_cmpf;
766
    LOAD_COMMON
767
    LOAD_COMMON2
768
    int map_generation= c->map_generation;
769
    int x,y, d;
770
    const int dia_size= c->dia_size&0xFF;
771

    
772
    cmpf= s->dsp.me_cmp[size];
773
    chroma_cmpf= s->dsp.me_cmp[size+1];
774

    
775
    for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
776
        for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
777
            CHECK_MV(x, y);
778
        }
779
    }
780

    
781
    x= best[0];
782
    y= best[1];
783
    d= dmin;
784
    CHECK_CLIPPED_MV(x  , y);
785
    CHECK_CLIPPED_MV(x+1, y);
786
    CHECK_CLIPPED_MV(x, y+1);
787
    CHECK_CLIPPED_MV(x-1, y);
788
    CHECK_CLIPPED_MV(x, y-1);
789
    best[0]= x;
790
    best[1]= y;
791

    
792
    return d;
793
}
794

    
795
#define SAB_CHECK_MV(ax,ay)\
796
{\
797
    const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
798
    const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
799
/*printf("sab check %d %d\n", ax, ay);*/\
800
    if(map[index]!=key){\
801
        d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
802
        map[index]= key;\
803
        score_map[index]= d;\
804
        d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
805
/*printf("score: %d\n", d);*/\
806
        if(d < minima[minima_count-1].height){\
807
            int j=0;\
808
            \
809
            while(d >= minima[j].height) j++;\
810
\
811
            memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
812
\
813
            minima[j].checked= 0;\
814
            minima[j].height= d;\
815
            minima[j].x= ax;\
816
            minima[j].y= ay;\
817
            \
818
            i=-1;\
819
            continue;\
820
        }\
821
    }\
822
}
823

    
824
#define MAX_SAB_SIZE ME_MAP_SIZE
825
static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
826
                                       int src_index, int ref_index, int const penalty_factor,
827
                                       int size, int h, int flags)
828
{
829
    MotionEstContext * const c= &s->me;
830
    me_cmp_func cmpf, chroma_cmpf;
831
    Minima minima[MAX_SAB_SIZE];
832
    const int minima_count= FFABS(c->dia_size);
833
    int i, j;
834
    LOAD_COMMON
835
    LOAD_COMMON2
836
    int map_generation= c->map_generation;
837

    
838
    cmpf= s->dsp.me_cmp[size];
839
    chroma_cmpf= s->dsp.me_cmp[size+1];
840

    
841
    /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
842
      become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
843
     */
844
    for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
845
        uint32_t key= map[i];
846

    
847
        key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
848

    
849
        if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
850

    
851
        minima[j].height= score_map[i];
852
        minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
853
        minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
854
        minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
855
        minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
856

    
857
        // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
858
        if(   minima[j].x > xmax || minima[j].x < xmin
859
           || minima[j].y > ymax || minima[j].y < ymin)
860
            continue;
861

    
862
        minima[j].checked=0;
863
        if(minima[j].x || minima[j].y)
864
            minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
865

    
866
        j++;
867
    }
868

    
869
    qsort(minima, j, sizeof(Minima), minima_cmp);
870

    
871
    for(; j<minima_count; j++){
872
        minima[j].height=256*256*256*64;
873
        minima[j].checked=0;
874
        minima[j].x= minima[j].y=0;
875
    }
876

    
877
    for(i=0; i<minima_count; i++){
878
        const int x= minima[i].x;
879
        const int y= minima[i].y;
880
        int d;
881

    
882
        if(minima[i].checked) continue;
883

    
884
        if(   x >= xmax || x <= xmin
885
           || y >= ymax || y <= ymin)
886
           continue;
887

    
888
        SAB_CHECK_MV(x-1, y)
889
        SAB_CHECK_MV(x+1, y)
890
        SAB_CHECK_MV(x  , y-1)
891
        SAB_CHECK_MV(x  , y+1)
892

    
893
        minima[i].checked= 1;
894
    }
895

    
896
    best[0]= minima[0].x;
897
    best[1]= minima[0].y;
898
    dmin= minima[0].height;
899

    
900
    if(   best[0] < xmax && best[0] > xmin
901
       && best[1] < ymax && best[1] > ymin){
902
        int d;
903
        //ensure that the refernece samples for hpel refinement are in the map
904
        CHECK_MV(best[0]-1, best[1])
905
        CHECK_MV(best[0]+1, best[1])
906
        CHECK_MV(best[0], best[1]-1)
907
        CHECK_MV(best[0], best[1]+1)
908
    }
909
    return dmin;
910
}
911

    
912
static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
913
                                       int src_index, int ref_index, int const penalty_factor,
914
                                       int size, int h, int flags)
915
{
916
    MotionEstContext * const c= &s->me;
917
    me_cmp_func cmpf, chroma_cmpf;
918
    int dia_size;
919
    LOAD_COMMON
920
    LOAD_COMMON2
921
    int map_generation= c->map_generation;
922

    
923
    cmpf= s->dsp.me_cmp[size];
924
    chroma_cmpf= s->dsp.me_cmp[size+1];
925

    
926
    for(dia_size=1; dia_size<=c->dia_size; dia_size++){
927
        int dir, start, end;
928
        const int x= best[0];
929
        const int y= best[1];
930

    
931
        start= FFMAX(0, y + dia_size - ymax);
932
        end  = FFMIN(dia_size, xmax - x + 1);
933
        for(dir= start; dir<end; dir++){
934
            int d;
935

    
936
//check(x + dir,y + dia_size - dir,0, a0)
937
            CHECK_MV(x + dir           , y + dia_size - dir);
938
        }
939

    
940
        start= FFMAX(0, x + dia_size - xmax);
941
        end  = FFMIN(dia_size, y - ymin + 1);
942
        for(dir= start; dir<end; dir++){
943
            int d;
944

    
945
//check(x + dia_size - dir, y - dir,0, a1)
946
            CHECK_MV(x + dia_size - dir, y - dir           );
947
        }
948

    
949
        start= FFMAX(0, -y + dia_size + ymin );
950
        end  = FFMIN(dia_size, x - xmin + 1);
951
        for(dir= start; dir<end; dir++){
952
            int d;
953

    
954
//check(x - dir,y - dia_size + dir,0, a2)
955
            CHECK_MV(x - dir           , y - dia_size + dir);
956
        }
957

    
958
        start= FFMAX(0, -x + dia_size + xmin );
959
        end  = FFMIN(dia_size, ymax - y + 1);
960
        for(dir= start; dir<end; dir++){
961
            int d;
962

    
963
//check(x - dia_size + dir, y + dir,0, a3)
964
            CHECK_MV(x - dia_size + dir, y + dir           );
965
        }
966

    
967
        if(x!=best[0] || y!=best[1])
968
            dia_size=0;
969
    }
970
    return dmin;
971
}
972

    
973
static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
974
                                       int src_index, int ref_index, int const penalty_factor,
975
                                       int size, int h, int flags){
976
    MotionEstContext * const c= &s->me;
977
    if(c->dia_size==-1)
978
        return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
979
    else if(c->dia_size<-1)
980
        return   sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
981
    else if(c->dia_size<2)
982
        return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
983
    else if(c->dia_size>1024)
984
        return          full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
985
    else if(c->dia_size>768)
986
        return           umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
987
    else if(c->dia_size>512)
988
        return           hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
989
    else if(c->dia_size>256)
990
        return       l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
991
    else
992
        return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
993
}
994

    
995
/*!
996
   \param P[10][2] a list of candidate mvs to check before starting the
997
   iterative search. If one of the candidates is close to the optimal mv, then
998
   it takes fewer iterations. And it increases the chance that we find the
999
   optimal mv.
1000
 */
1001
static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
1002
                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
1003
                             int ref_mv_scale, int flags, int size, int h)
1004
{
1005
    MotionEstContext * const c= &s->me;
1006
    int best[2]={0, 0};      /*!< x and y coordinates of the best motion vector.
1007
                               i.e. the difference between the position of the
1008
                               block currently being encoded and the position of
1009
                               the block chosen to predict it from. */
1010
    int d;                   ///< the score (cmp + penalty) of any given mv
1011
    int dmin;                /*!< the best value of d, i.e. the score
1012
                               corresponding to the mv stored in best[]. */
1013
    int map_generation;
1014
    int penalty_factor;
1015
    const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
1016
    const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
1017
    me_cmp_func cmpf, chroma_cmpf;
1018

    
1019
    LOAD_COMMON
1020
    LOAD_COMMON2
1021

    
1022
    if(c->pre_pass){
1023
        penalty_factor= c->pre_penalty_factor;
1024
        cmpf= s->dsp.me_pre_cmp[size];
1025
        chroma_cmpf= s->dsp.me_pre_cmp[size+1];
1026
    }else{
1027
        penalty_factor= c->penalty_factor;
1028
        cmpf= s->dsp.me_cmp[size];
1029
        chroma_cmpf= s->dsp.me_cmp[size+1];
1030
    }
1031

    
1032
    map_generation= update_map_generation(c);
1033

    
1034
    assert(cmpf);
1035
    dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
1036
    map[0]= map_generation;
1037
    score_map[0]= dmin;
1038

    
1039
    //FIXME precalc first term below?
1040
    if((s->pict_type == AV_PICTURE_TYPE_B && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
1041
        dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
1042

    
1043
    /* first line */
1044
    if (s->first_slice_line) {
1045
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1046
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1047
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1048
    }else{
1049
        if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
1050
                    && ( P_LEFT[0]    |P_LEFT[1]
1051
                        |P_TOP[0]     |P_TOP[1]
1052
                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
1053
            *mx_ptr= 0;
1054
            *my_ptr= 0;
1055
            c->skip=1;
1056
            return dmin;
1057
        }
1058
        CHECK_MV(    P_MEDIAN[0] >>shift ,    P_MEDIAN[1] >>shift)
1059
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)-1)
1060
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)+1)
1061
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift)  )
1062
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift)  )
1063
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1064
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1065
        CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
1066
        CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
1067
        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1068
    }
1069
    if(dmin>h*h*4){
1070
        if(c->pre_pass){
1071
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
1072
                            (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
1073
            if(!s->first_slice_line)
1074
                CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1075
                                (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1076
        }else{
1077
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1078
                            (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1079
            if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1080
                CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1081
                                (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1082
        }
1083
    }
1084

    
1085
    if(c->avctx->last_predictor_count){
1086
        const int count= c->avctx->last_predictor_count;
1087
        const int xstart= FFMAX(0, s->mb_x - count);
1088
        const int ystart= FFMAX(0, s->mb_y - count);
1089
        const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
1090
        const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
1091
        int mb_y;
1092

    
1093
        for(mb_y=ystart; mb_y<yend; mb_y++){
1094
            int mb_x;
1095
            for(mb_x=xstart; mb_x<xend; mb_x++){
1096
                const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
1097
                int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
1098
                int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
1099

    
1100
                if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
1101
                CHECK_MV(mx,my)
1102
            }
1103
        }
1104
    }
1105

    
1106
//check(best[0],best[1],0, b0)
1107
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1108

    
1109
//check(best[0],best[1],0, b1)
1110
    *mx_ptr= best[0];
1111
    *my_ptr= best[1];
1112

    
1113
//    printf("%d %d %d \n", best[0], best[1], dmin);
1114
    return dmin;
1115
}
1116

    
1117
//this function is dedicated to the braindamaged gcc
1118
inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
1119
                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
1120
                             int ref_mv_scale, int size, int h)
1121
{
1122
    MotionEstContext * const c= &s->me;
1123
//FIXME convert other functions in the same way if faster
1124
    if(c->flags==0 && h==16 && size==0){
1125
        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
1126
//    case FLAG_QPEL:
1127
//        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
1128
    }else{
1129
        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
1130
    }
1131
}
1132

    
1133
static int epzs_motion_search4(MpegEncContext * s,
1134
                             int *mx_ptr, int *my_ptr, int P[10][2],
1135
                             int src_index, int ref_index, int16_t (*last_mv)[2],
1136
                             int ref_mv_scale)
1137
{
1138
    MotionEstContext * const c= &s->me;
1139
    int best[2]={0, 0};
1140
    int d, dmin;
1141
    int map_generation;
1142
    const int penalty_factor= c->penalty_factor;
1143
    const int size=1;
1144
    const int h=8;
1145
    const int ref_mv_stride= s->mb_stride;
1146
    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1147
    me_cmp_func cmpf, chroma_cmpf;
1148
    LOAD_COMMON
1149
    int flags= c->flags;
1150
    LOAD_COMMON2
1151

    
1152
    cmpf= s->dsp.me_cmp[size];
1153
    chroma_cmpf= s->dsp.me_cmp[size+1];
1154

    
1155
    map_generation= update_map_generation(c);
1156

    
1157
    dmin = 1000000;
1158
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1159
    /* first line */
1160
    if (s->first_slice_line) {
1161
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1162
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1163
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1164
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1165
    }else{
1166
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1167
        //FIXME try some early stop
1168
        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1169
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1170
        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1171
        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1172
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1173
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1174
    }
1175
    if(dmin>64*4){
1176
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1177
                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1178
        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1179
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1180
                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1181
    }
1182

    
1183
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1184

    
1185
    *mx_ptr= best[0];
1186
    *my_ptr= best[1];
1187

    
1188
//    printf("%d %d %d \n", best[0], best[1], dmin);
1189
    return dmin;
1190
}
1191

    
1192
//try to merge with above FIXME (needs PSNR test)
1193
static int epzs_motion_search2(MpegEncContext * s,
1194
                             int *mx_ptr, int *my_ptr, int P[10][2],
1195
                             int src_index, int ref_index, int16_t (*last_mv)[2],
1196
                             int ref_mv_scale)
1197
{
1198
    MotionEstContext * const c= &s->me;
1199
    int best[2]={0, 0};
1200
    int d, dmin;
1201
    int map_generation;
1202
    const int penalty_factor= c->penalty_factor;
1203
    const int size=0; //FIXME pass as arg
1204
    const int h=8;
1205
    const int ref_mv_stride= s->mb_stride;
1206
    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1207
    me_cmp_func cmpf, chroma_cmpf;
1208
    LOAD_COMMON
1209
    int flags= c->flags;
1210
    LOAD_COMMON2
1211

    
1212
    cmpf= s->dsp.me_cmp[size];
1213
    chroma_cmpf= s->dsp.me_cmp[size+1];
1214

    
1215
    map_generation= update_map_generation(c);
1216

    
1217
    dmin = 1000000;
1218
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1219
    /* first line */
1220
    if (s->first_slice_line) {
1221
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1222
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1223
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1224
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1225
    }else{
1226
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1227
        //FIXME try some early stop
1228
        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1229
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1230
        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1231
        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1232
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1233
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1234
    }
1235
    if(dmin>64*4){
1236
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1237
                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1238
        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1239
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1240
                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1241
    }
1242

    
1243
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1244

    
1245
    *mx_ptr= best[0];
1246
    *my_ptr= best[1];
1247

    
1248
//    printf("%d %d %d \n", best[0], best[1], dmin);
1249
    return dmin;
1250
}