Statistics
| Branch: | Revision:

ffmpeg / libavcodec / motion_est_template.c @ 2912e87a

History | View | Annotate | Download (44.5 KB)

1
/*
2
 * Motion estimation
3
 * Copyright (c) 2002-2004 Michael Niedermayer
4
 *
5
 * This file is part of Libav.
6
 *
7
 * Libav is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * Libav is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with Libav; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

    
22
/**
23
 * @file
24
 * Motion estimation template.
25
 */
26

    
27
//Let us hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
28
#define LOAD_COMMON\
29
    uint32_t av_unused * const score_map= c->score_map;\
30
    const int av_unused xmin= c->xmin;\
31
    const int av_unused ymin= c->ymin;\
32
    const int av_unused xmax= c->xmax;\
33
    const int av_unused ymax= c->ymax;\
34
    uint8_t *mv_penalty= c->current_mv_penalty;\
35
    const int pred_x= c->pred_x;\
36
    const int pred_y= c->pred_y;\
37

    
38
#define CHECK_HALF_MV(dx, dy, x, y)\
39
{\
40
    const int hx= 2*(x)+(dx);\
41
    const int hy= 2*(y)+(dy);\
42
    d= cmp_hpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
43
    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
44
    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
45
}
46

    
47
#if 0
48
static int hpel_motion_search)(MpegEncContext * s,
49
                                  int *mx_ptr, int *my_ptr, int dmin,
50
                                  uint8_t *ref_data[3],
51
                                  int size)
52
{
53
    const int xx = 16 * s->mb_x + 8*(n&1);
54
    const int yy = 16 * s->mb_y + 8*(n>>1);
55
    const int mx = *mx_ptr;
56
    const int my = *my_ptr;
57
    const int penalty_factor= c->sub_penalty_factor;
58

59
    LOAD_COMMON
60

61
 //   INIT;
62
 //FIXME factorize
63
    me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;
64

65
    if(s->no_rounding /*FIXME b_type*/){
66
        hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];
67
        chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
68
    }else{
69
        hpel_put=& s->dsp.put_pixels_tab[size];
70
        chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
71
    }
72
    cmpf= s->dsp.me_cmp[size];
73
    chroma_cmpf= s->dsp.me_cmp[size+1];
74
    cmp_sub= s->dsp.me_sub_cmp[size];
75
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
76

77
    if(c->skip){ //FIXME somehow move up (benchmark)
78
        *mx_ptr = 0;
79
        *my_ptr = 0;
80
        return dmin;
81
    }
82

83
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
84
        CMP_HPEL(dmin, 0, 0, mx, my, size);
85
        if(mx || my)
86
            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
87
    }
88

89
    if (mx > xmin && mx < xmax &&
90
        my > ymin && my < ymax) {
91
        int bx=2*mx, by=2*my;
92
        int d= dmin;
93

94
        CHECK_HALF_MV(1, 1, mx-1, my-1)
95
        CHECK_HALF_MV(0, 1, mx  , my-1)
96
        CHECK_HALF_MV(1, 1, mx  , my-1)
97
        CHECK_HALF_MV(1, 0, mx-1, my  )
98
        CHECK_HALF_MV(1, 0, mx  , my  )
99
        CHECK_HALF_MV(1, 1, mx-1, my  )
100
        CHECK_HALF_MV(0, 1, mx  , my  )
101
        CHECK_HALF_MV(1, 1, mx  , my  )
102

103
        assert(bx >= xmin*2 || bx <= xmax*2 || by >= ymin*2 || by <= ymax*2);
104

105
        *mx_ptr = bx;
106
        *my_ptr = by;
107
    }else{
108
        *mx_ptr =2*mx;
109
        *my_ptr =2*my;
110
    }
111

112
    return dmin;
113
}
114

115
#else
116
static int hpel_motion_search(MpegEncContext * s,
117
                                  int *mx_ptr, int *my_ptr, int dmin,
118
                                  int src_index, int ref_index,
119
                                  int size, int h)
120
{
121
    MotionEstContext * const c= &s->me;
122
    const int mx = *mx_ptr;
123
    const int my = *my_ptr;
124
    const int penalty_factor= c->sub_penalty_factor;
125
    me_cmp_func cmp_sub, chroma_cmp_sub;
126
    int bx=2*mx, by=2*my;
127

    
128
    LOAD_COMMON
129
    int flags= c->sub_flags;
130

    
131
 //FIXME factorize
132

    
133
    cmp_sub= s->dsp.me_sub_cmp[size];
134
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
135

    
136
    if(c->skip){ //FIXME move out of hpel?
137
        *mx_ptr = 0;
138
        *my_ptr = 0;
139
        return dmin;
140
    }
141

    
142
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
143
        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
144
        if(mx || my || size>0)
145
            dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
146
    }
147

    
148
    if (mx > xmin && mx < xmax &&
149
        my > ymin && my < ymax) {
150
        int d= dmin;
151
        const int index= (my<<ME_MAP_SHIFT) + mx;
152
        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
153
                     + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
154
        const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
155
                     + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
156
        const int r= score_map[(index+ 1               )&(ME_MAP_SIZE-1)]
157
                     + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
158
        const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
159
                     + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
160

    
161
#if 1
162
        int key;
163
        int map_generation= c->map_generation;
164
#ifndef NDEBUG
165
        uint32_t *map= c->map;
166
#endif
167
        key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
168
        assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
169
        key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation;
170
        assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key);
171
        key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation;
172
        assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
173
        key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
174
        assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
175
#endif
176
        if(t<=b){
177
            CHECK_HALF_MV(0, 1, mx  ,my-1)
178
            if(l<=r){
179
                CHECK_HALF_MV(1, 1, mx-1, my-1)
180
                if(t+r<=b+l){
181
                    CHECK_HALF_MV(1, 1, mx  , my-1)
182
                }else{
183
                    CHECK_HALF_MV(1, 1, mx-1, my  )
184
                }
185
                CHECK_HALF_MV(1, 0, mx-1, my  )
186
            }else{
187
                CHECK_HALF_MV(1, 1, mx  , my-1)
188
                if(t+l<=b+r){
189
                    CHECK_HALF_MV(1, 1, mx-1, my-1)
190
                }else{
191
                    CHECK_HALF_MV(1, 1, mx  , my  )
192
                }
193
                CHECK_HALF_MV(1, 0, mx  , my  )
194
            }
195
        }else{
196
            if(l<=r){
197
                if(t+l<=b+r){
198
                    CHECK_HALF_MV(1, 1, mx-1, my-1)
199
                }else{
200
                    CHECK_HALF_MV(1, 1, mx  , my  )
201
                }
202
                CHECK_HALF_MV(1, 0, mx-1, my)
203
                CHECK_HALF_MV(1, 1, mx-1, my)
204
            }else{
205
                if(t+r<=b+l){
206
                    CHECK_HALF_MV(1, 1, mx  , my-1)
207
                }else{
208
                    CHECK_HALF_MV(1, 1, mx-1, my)
209
                }
210
                CHECK_HALF_MV(1, 0, mx  , my)
211
                CHECK_HALF_MV(1, 1, mx  , my)
212
            }
213
            CHECK_HALF_MV(0, 1, mx  , my)
214
        }
215
        assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2);
216
    }
217

    
218
    *mx_ptr = bx;
219
    *my_ptr = by;
220

    
221
    return dmin;
222
}
223
#endif
224

    
225
static int no_sub_motion_search(MpegEncContext * s,
226
          int *mx_ptr, int *my_ptr, int dmin,
227
                                  int src_index, int ref_index,
228
                                  int size, int h)
229
{
230
    (*mx_ptr)<<=1;
231
    (*my_ptr)<<=1;
232
    return dmin;
233
}
234

    
235
inline int ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
236
                               int ref_index, int size, int h, int add_rate)
237
{
238
//    const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
239
    MotionEstContext * const c= &s->me;
240
    const int penalty_factor= c->mb_penalty_factor;
241
    const int flags= c->mb_flags;
242
    const int qpel= flags & FLAG_QPEL;
243
    const int mask= 1+2*qpel;
244
    me_cmp_func cmp_sub, chroma_cmp_sub;
245
    int d;
246

    
247
    LOAD_COMMON
248

    
249
 //FIXME factorize
250

    
251
    cmp_sub= s->dsp.mb_cmp[size];
252
    chroma_cmp_sub= s->dsp.mb_cmp[size+1];
253

    
254
//    assert(!c->skip);
255
//    assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
256

    
257
    d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
258
    //FIXME check cbp before adding penalty for (0,0) vector
259
    if(add_rate && (mx || my || size>0))
260
        d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
261

    
262
    return d;
263
}
264

    
265
#define CHECK_QUARTER_MV(dx, dy, x, y)\
266
{\
267
    const int hx= 4*(x)+(dx);\
268
    const int hy= 4*(y)+(dy);\
269
    d= cmp_qpel(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
270
    d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
271
    COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
272
}
273

    
274
static int qpel_motion_search(MpegEncContext * s,
275
                                  int *mx_ptr, int *my_ptr, int dmin,
276
                                  int src_index, int ref_index,
277
                                  int size, int h)
278
{
279
    MotionEstContext * const c= &s->me;
280
    const int mx = *mx_ptr;
281
    const int my = *my_ptr;
282
    const int penalty_factor= c->sub_penalty_factor;
283
    const int map_generation= c->map_generation;
284
    const int subpel_quality= c->avctx->me_subpel_quality;
285
    uint32_t *map= c->map;
286
    me_cmp_func cmpf, chroma_cmpf;
287
    me_cmp_func cmp_sub, chroma_cmp_sub;
288

    
289
    LOAD_COMMON
290
    int flags= c->sub_flags;
291

    
292
    cmpf= s->dsp.me_cmp[size];
293
    chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
294
 //FIXME factorize
295

    
296
    cmp_sub= s->dsp.me_sub_cmp[size];
297
    chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
298

    
299
    if(c->skip){ //FIXME somehow move up (benchmark)
300
        *mx_ptr = 0;
301
        *my_ptr = 0;
302
        return dmin;
303
    }
304

    
305
    if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
306
        dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
307
        if(mx || my || size>0)
308
            dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
309
    }
310

    
311
    if (mx > xmin && mx < xmax &&
312
        my > ymin && my < ymax) {
313
        int bx=4*mx, by=4*my;
314
        int d= dmin;
315
        int i, nx, ny;
316
        const int index= (my<<ME_MAP_SHIFT) + mx;
317
        const int t= score_map[(index-(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
318
        const int l= score_map[(index- 1                 )&(ME_MAP_SIZE-1)];
319
        const int r= score_map[(index+ 1                 )&(ME_MAP_SIZE-1)];
320
        const int b= score_map[(index+(1<<ME_MAP_SHIFT)  )&(ME_MAP_SIZE-1)];
321
        const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
322
        int best[8];
323
        int best_pos[8][2];
324

    
325
        memset(best, 64, sizeof(int)*8);
326
#if 1
327
        if(s->me.dia_size>=2){
328
            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
329
            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
330
            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
331
            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
332

    
333
            for(ny= -3; ny <= 3; ny++){
334
                for(nx= -3; nx <= 3; nx++){
335
                    //FIXME this could overflow (unlikely though)
336
                    const int64_t t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t;
337
                    const int64_t c2= nx*nx*( r +  l - 2*c) + 4*nx*( r- l) + 32*c;
338
                    const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
339
                    int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
340
                    int i;
341

    
342
                    if((nx&3)==0 && (ny&3)==0) continue;
343

    
344
                    score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
345

    
346
//                    if(nx&1) score-=1024*c->penalty_factor;
347
//                    if(ny&1) score-=1024*c->penalty_factor;
348

    
349
                    for(i=0; i<8; i++){
350
                        if(score < best[i]){
351
                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
352
                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
353
                            best[i]= score;
354
                            best_pos[i][0]= nx + 4*mx;
355
                            best_pos[i][1]= ny + 4*my;
356
                            break;
357
                        }
358
                    }
359
                }
360
            }
361
        }else{
362
            int tl;
363
            //FIXME this could overflow (unlikely though)
364
            const int cx = 4*(r - l);
365
            const int cx2= r + l - 2*c;
366
            const int cy = 4*(b - t);
367
            const int cy2= b + t - 2*c;
368
            int cxy;
369

    
370
            if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
371
                tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
372
            }else{
373
                tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
374
            }
375

    
376
            cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
377

    
378
            assert(16*cx2 + 4*cx + 32*c == 32*r);
379
            assert(16*cx2 - 4*cx + 32*c == 32*l);
380
            assert(16*cy2 + 4*cy + 32*c == 32*b);
381
            assert(16*cy2 - 4*cy + 32*c == 32*t);
382
            assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
383

    
384
            for(ny= -3; ny <= 3; ny++){
385
                for(nx= -3; nx <= 3; nx++){
386
                    //FIXME this could overflow (unlikely though)
387
                    int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
388
                    int i;
389

    
390
                    if((nx&3)==0 && (ny&3)==0) continue;
391

    
392
                    score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
393
//                    if(nx&1) score-=32*c->penalty_factor;
394
  //                  if(ny&1) score-=32*c->penalty_factor;
395

    
396
                    for(i=0; i<8; i++){
397
                        if(score < best[i]){
398
                            memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
399
                            memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i));
400
                            best[i]= score;
401
                            best_pos[i][0]= nx + 4*mx;
402
                            best_pos[i][1]= ny + 4*my;
403
                            break;
404
                        }
405
                    }
406
                }
407
            }
408
        }
409
        for(i=0; i<subpel_quality; i++){
410
            nx= best_pos[i][0];
411
            ny= best_pos[i][1];
412
            CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2)
413
        }
414

    
415
#if 0
416
            const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
417
            const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
418
            const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
419
            const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
420
//            if(l < r && l < t && l < b && l < tl && l < bl && l < tr && l < br && bl < tl){
421
            if(tl<br){
422

423
//            nx= FFMAX(4*mx - bx, bx - 4*mx);
424
//            ny= FFMAX(4*my - by, by - 4*my);
425

426
            static int stats[7][7], count;
427
            count++;
428
            stats[4*mx - bx + 3][4*my - by + 3]++;
429
            if(256*256*256*64 % count ==0){
430
                for(i=0; i<49; i++){
431
                    if((i%7)==0) printf("\n");
432
                    printf("%6d ", stats[0][i]);
433
                }
434
                printf("\n");
435
            }
436
            }
437
#endif
438
#else
439

    
440
        CHECK_QUARTER_MV(2, 2, mx-1, my-1)
441
        CHECK_QUARTER_MV(0, 2, mx  , my-1)
442
        CHECK_QUARTER_MV(2, 2, mx  , my-1)
443
        CHECK_QUARTER_MV(2, 0, mx  , my  )
444
        CHECK_QUARTER_MV(2, 2, mx  , my  )
445
        CHECK_QUARTER_MV(0, 2, mx  , my  )
446
        CHECK_QUARTER_MV(2, 2, mx-1, my  )
447
        CHECK_QUARTER_MV(2, 0, mx-1, my  )
448

    
449
        nx= bx;
450
        ny= by;
451

    
452
        for(i=0; i<8; i++){
453
            int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
454
            int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
455
            CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2)
456
        }
457
#endif
458
#if 0
459
        //outer ring
460
        CHECK_QUARTER_MV(1, 3, mx-1, my-1)
461
        CHECK_QUARTER_MV(1, 2, mx-1, my-1)
462
        CHECK_QUARTER_MV(1, 1, mx-1, my-1)
463
        CHECK_QUARTER_MV(2, 1, mx-1, my-1)
464
        CHECK_QUARTER_MV(3, 1, mx-1, my-1)
465
        CHECK_QUARTER_MV(0, 1, mx  , my-1)
466
        CHECK_QUARTER_MV(1, 1, mx  , my-1)
467
        CHECK_QUARTER_MV(2, 1, mx  , my-1)
468
        CHECK_QUARTER_MV(3, 1, mx  , my-1)
469
        CHECK_QUARTER_MV(3, 2, mx  , my-1)
470
        CHECK_QUARTER_MV(3, 3, mx  , my-1)
471
        CHECK_QUARTER_MV(3, 0, mx  , my  )
472
        CHECK_QUARTER_MV(3, 1, mx  , my  )
473
        CHECK_QUARTER_MV(3, 2, mx  , my  )
474
        CHECK_QUARTER_MV(3, 3, mx  , my  )
475
        CHECK_QUARTER_MV(2, 3, mx  , my  )
476
        CHECK_QUARTER_MV(1, 3, mx  , my  )
477
        CHECK_QUARTER_MV(0, 3, mx  , my  )
478
        CHECK_QUARTER_MV(3, 3, mx-1, my  )
479
        CHECK_QUARTER_MV(2, 3, mx-1, my  )
480
        CHECK_QUARTER_MV(1, 3, mx-1, my  )
481
        CHECK_QUARTER_MV(1, 2, mx-1, my  )
482
        CHECK_QUARTER_MV(1, 1, mx-1, my  )
483
        CHECK_QUARTER_MV(1, 0, mx-1, my  )
484
#endif
485
        assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4);
486

    
487
        *mx_ptr = bx;
488
        *my_ptr = by;
489
    }else{
490
        *mx_ptr =4*mx;
491
        *my_ptr =4*my;
492
    }
493

    
494
    return dmin;
495
}
496

    
497

    
498
#define CHECK_MV(x,y)\
499
{\
500
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
501
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
502
    assert((x) >= xmin);\
503
    assert((x) <= xmax);\
504
    assert((y) >= ymin);\
505
    assert((y) <= ymax);\
506
/*printf("check_mv %d %d\n", x, y);*/\
507
    if(map[index]!=key){\
508
        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
509
        map[index]= key;\
510
        score_map[index]= d;\
511
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
512
/*printf("score:%d\n", d);*/\
513
        COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
514
    }\
515
}
516

    
517
#define CHECK_CLIPPED_MV(ax,ay)\
518
{\
519
    const int Lx= ax;\
520
    const int Ly= ay;\
521
    const int Lx2= FFMAX(xmin, FFMIN(Lx, xmax));\
522
    const int Ly2= FFMAX(ymin, FFMIN(Ly, ymax));\
523
    CHECK_MV(Lx2, Ly2)\
524
}
525

    
526
#define CHECK_MV_DIR(x,y,new_dir)\
527
{\
528
    const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
529
    const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
530
/*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
531
    if(map[index]!=key){\
532
        d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
533
        map[index]= key;\
534
        score_map[index]= d;\
535
        d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
536
/*printf("score:%d\n", d);*/\
537
        if(d<dmin){\
538
            best[0]=x;\
539
            best[1]=y;\
540
            dmin=d;\
541
            next_dir= new_dir;\
542
        }\
543
    }\
544
}
545

    
546
#define check(x,y,S,v)\
547
if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
548
if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
549
if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
550
if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
551

    
552
#define LOAD_COMMON2\
553
    uint32_t *map= c->map;\
554
    const int qpel= flags&FLAG_QPEL;\
555
    const int shift= 1+qpel;\
556

    
557
static av_always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
558
                                       int src_index, int ref_index, int const penalty_factor,
559
                                       int size, int h, int flags)
560
{
561
    MotionEstContext * const c= &s->me;
562
    me_cmp_func cmpf, chroma_cmpf;
563
    int next_dir=-1;
564
    LOAD_COMMON
565
    LOAD_COMMON2
566
    int map_generation= c->map_generation;
567

    
568
    cmpf= s->dsp.me_cmp[size];
569
    chroma_cmpf= s->dsp.me_cmp[size+1];
570

    
571
    { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
572
        const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
573
        const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
574
        if(map[index]!=key){ //this will be executed only very rarey
575
            score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
576
            map[index]= key;
577
        }
578
    }
579

    
580
    for(;;){
581
        int d;
582
        const int dir= next_dir;
583
        const int x= best[0];
584
        const int y= best[1];
585
        next_dir=-1;
586

    
587
//printf("%d", dir);
588
        if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y  , 0)
589
        if(dir!=3 && y>ymin) CHECK_MV_DIR(x  , y-1, 1)
590
        if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y  , 2)
591
        if(dir!=1 && y<ymax) CHECK_MV_DIR(x  , y+1, 3)
592

    
593
        if(next_dir==-1){
594
            return dmin;
595
        }
596
    }
597
}
598

    
599
static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
600
                                       int src_index, int ref_index, int const penalty_factor,
601
                                       int size, int h, int flags)
602
{
603
    MotionEstContext * const c= &s->me;
604
    me_cmp_func cmpf, chroma_cmpf;
605
    int dia_size;
606
    LOAD_COMMON
607
    LOAD_COMMON2
608
    int map_generation= c->map_generation;
609

    
610
    cmpf= s->dsp.me_cmp[size];
611
    chroma_cmpf= s->dsp.me_cmp[size+1];
612

    
613
    for(dia_size=1; dia_size<=4; dia_size++){
614
        int dir;
615
        const int x= best[0];
616
        const int y= best[1];
617

    
618
        if(dia_size&(dia_size-1)) continue;
619

    
620
        if(   x + dia_size > xmax
621
           || x - dia_size < xmin
622
           || y + dia_size > ymax
623
           || y - dia_size < ymin)
624
           continue;
625

    
626
        for(dir= 0; dir<dia_size; dir+=2){
627
            int d;
628

    
629
            CHECK_MV(x + dir           , y + dia_size - dir);
630
            CHECK_MV(x + dia_size - dir, y - dir           );
631
            CHECK_MV(x - dir           , y - dia_size + dir);
632
            CHECK_MV(x - dia_size + dir, y + dir           );
633
        }
634

    
635
        if(x!=best[0] || y!=best[1])
636
            dia_size=0;
637
#if 0
638
{
639
int dx, dy, i;
640
static int stats[8*8];
641
dx= FFABS(x-best[0]);
642
dy= FFABS(y-best[1]);
643
if(dy>dx){
644
    dx^=dy; dy^=dx; dx^=dy;
645
}
646
stats[dy*8 + dx] ++;
647
if(256*256*256*64 % (stats[0]+1)==0){
648
    for(i=0; i<64; i++){
649
        if((i&7)==0) printf("\n");
650
        printf("%8d ", stats[i]);
651
    }
652
    printf("\n");
653
}
654
}
655
#endif
656
    }
657
    return dmin;
658
}
659

    
660
static int hex_search(MpegEncContext * s, int *best, int dmin,
661
                                       int src_index, int ref_index, int const penalty_factor,
662
                                       int size, int h, int flags, int dia_size)
663
{
664
    MotionEstContext * const c= &s->me;
665
    me_cmp_func cmpf, chroma_cmpf;
666
    LOAD_COMMON
667
    LOAD_COMMON2
668
    int map_generation= c->map_generation;
669
    int x,y,d;
670
    const int dec= dia_size & (dia_size-1);
671

    
672
    cmpf= s->dsp.me_cmp[size];
673
    chroma_cmpf= s->dsp.me_cmp[size+1];
674

    
675
    for(;dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
676
        do{
677
            x= best[0];
678
            y= best[1];
679

    
680
            CHECK_CLIPPED_MV(x  -dia_size    , y);
681
            CHECK_CLIPPED_MV(x+  dia_size    , y);
682
            CHECK_CLIPPED_MV(x+( dia_size>>1), y+dia_size);
683
            CHECK_CLIPPED_MV(x+( dia_size>>1), y-dia_size);
684
            if(dia_size>1){
685
                CHECK_CLIPPED_MV(x+(-dia_size>>1), y+dia_size);
686
                CHECK_CLIPPED_MV(x+(-dia_size>>1), y-dia_size);
687
            }
688
        }while(best[0] != x || best[1] != y);
689
    }
690

    
691
    return dmin;
692
}
693

    
694
static int l2s_dia_search(MpegEncContext * s, int *best, int dmin,
695
                                       int src_index, int ref_index, int const penalty_factor,
696
                                       int size, int h, int flags)
697
{
698
    MotionEstContext * const c= &s->me;
699
    me_cmp_func cmpf, chroma_cmpf;
700
    LOAD_COMMON
701
    LOAD_COMMON2
702
    int map_generation= c->map_generation;
703
    int x,y,i,d;
704
    int dia_size= c->dia_size&0xFF;
705
    const int dec= dia_size & (dia_size-1);
706
    static const int hex[8][2]={{-2, 0}, {-1,-1}, { 0,-2}, { 1,-1},
707
                                { 2, 0}, { 1, 1}, { 0, 2}, {-1, 1}};
708

    
709
    cmpf= s->dsp.me_cmp[size];
710
    chroma_cmpf= s->dsp.me_cmp[size+1];
711

    
712
    for(; dia_size; dia_size= dec ? dia_size-1 : dia_size>>1){
713
        do{
714
            x= best[0];
715
            y= best[1];
716
            for(i=0; i<8; i++){
717
                CHECK_CLIPPED_MV(x+hex[i][0]*dia_size, y+hex[i][1]*dia_size);
718
            }
719
        }while(best[0] != x || best[1] != y);
720
    }
721

    
722
    x= best[0];
723
    y= best[1];
724
    CHECK_CLIPPED_MV(x+1, y);
725
    CHECK_CLIPPED_MV(x, y+1);
726
    CHECK_CLIPPED_MV(x-1, y);
727
    CHECK_CLIPPED_MV(x, y-1);
728

    
729
    return dmin;
730
}
731

    
732
static int umh_search(MpegEncContext * s, int *best, int dmin,
733
                                       int src_index, int ref_index, int const penalty_factor,
734
                                       int size, int h, int flags)
735
{
736
    MotionEstContext * const c= &s->me;
737
    me_cmp_func cmpf, chroma_cmpf;
738
    LOAD_COMMON
739
    LOAD_COMMON2
740
    int map_generation= c->map_generation;
741
    int x,y,x2,y2, i, j, d;
742
    const int dia_size= c->dia_size&0xFE;
743
    static const int hex[16][2]={{-4,-2}, {-4,-1}, {-4, 0}, {-4, 1}, {-4, 2},
744
                                 { 4,-2}, { 4,-1}, { 4, 0}, { 4, 1}, { 4, 2},
745
                                 {-2, 3}, { 0, 4}, { 2, 3},
746
                                 {-2,-3}, { 0,-4}, { 2,-3},};
747

    
748
    cmpf= s->dsp.me_cmp[size];
749
    chroma_cmpf= s->dsp.me_cmp[size+1];
750

    
751
    x= best[0];
752
    y= best[1];
753
    for(x2=FFMAX(x-dia_size+1, xmin); x2<=FFMIN(x+dia_size-1,xmax); x2+=2){
754
        CHECK_MV(x2, y);
755
    }
756
    for(y2=FFMAX(y-dia_size/2+1, ymin); y2<=FFMIN(y+dia_size/2-1,ymax); y2+=2){
757
        CHECK_MV(x, y2);
758
    }
759

    
760
    x= best[0];
761
    y= best[1];
762
    for(y2=FFMAX(y-2, ymin); y2<=FFMIN(y+2,ymax); y2++){
763
        for(x2=FFMAX(x-2, xmin); x2<=FFMIN(x+2,xmax); x2++){
764
            CHECK_MV(x2, y2);
765
        }
766
    }
767

    
768
//FIXME prevent the CLIP stuff
769

    
770
    for(j=1; j<=dia_size/4; j++){
771
        for(i=0; i<16; i++){
772
            CHECK_CLIPPED_MV(x+hex[i][0]*j, y+hex[i][1]*j);
773
        }
774
    }
775

    
776
    return hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, 2);
777
}
778

    
779
static int full_search(MpegEncContext * s, int *best, int dmin,
780
                                       int src_index, int ref_index, int const penalty_factor,
781
                                       int size, int h, int flags)
782
{
783
    MotionEstContext * const c= &s->me;
784
    me_cmp_func cmpf, chroma_cmpf;
785
    LOAD_COMMON
786
    LOAD_COMMON2
787
    int map_generation= c->map_generation;
788
    int x,y, d;
789
    const int dia_size= c->dia_size&0xFF;
790

    
791
    cmpf= s->dsp.me_cmp[size];
792
    chroma_cmpf= s->dsp.me_cmp[size+1];
793

    
794
    for(y=FFMAX(-dia_size, ymin); y<=FFMIN(dia_size,ymax); y++){
795
        for(x=FFMAX(-dia_size, xmin); x<=FFMIN(dia_size,xmax); x++){
796
            CHECK_MV(x, y);
797
        }
798
    }
799

    
800
    x= best[0];
801
    y= best[1];
802
    d= dmin;
803
    CHECK_CLIPPED_MV(x  , y);
804
    CHECK_CLIPPED_MV(x+1, y);
805
    CHECK_CLIPPED_MV(x, y+1);
806
    CHECK_CLIPPED_MV(x-1, y);
807
    CHECK_CLIPPED_MV(x, y-1);
808
    best[0]= x;
809
    best[1]= y;
810

    
811
    return d;
812
}
813

    
814
#define SAB_CHECK_MV(ax,ay)\
815
{\
816
    const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
817
    const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
818
/*printf("sab check %d %d\n", ax, ay);*/\
819
    if(map[index]!=key){\
820
        d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
821
        map[index]= key;\
822
        score_map[index]= d;\
823
        d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
824
/*printf("score: %d\n", d);*/\
825
        if(d < minima[minima_count-1].height){\
826
            int j=0;\
827
            \
828
            while(d >= minima[j].height) j++;\
829
\
830
            memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\
831
\
832
            minima[j].checked= 0;\
833
            minima[j].height= d;\
834
            minima[j].x= ax;\
835
            minima[j].y= ay;\
836
            \
837
            i=-1;\
838
            continue;\
839
        }\
840
    }\
841
}
842

    
843
#define MAX_SAB_SIZE ME_MAP_SIZE
844
static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
845
                                       int src_index, int ref_index, int const penalty_factor,
846
                                       int size, int h, int flags)
847
{
848
    MotionEstContext * const c= &s->me;
849
    me_cmp_func cmpf, chroma_cmpf;
850
    Minima minima[MAX_SAB_SIZE];
851
    const int minima_count= FFABS(c->dia_size);
852
    int i, j;
853
    LOAD_COMMON
854
    LOAD_COMMON2
855
    int map_generation= c->map_generation;
856

    
857
    cmpf= s->dsp.me_cmp[size];
858
    chroma_cmpf= s->dsp.me_cmp[size+1];
859

    
860
    /*Note j<MAX_SAB_SIZE is needed if MAX_SAB_SIZE < ME_MAP_SIZE as j can
861
      become larger due to MVs overflowing their ME_MAP_MV_BITS bits space in map
862
     */
863
    for(j=i=0; i<ME_MAP_SIZE && j<MAX_SAB_SIZE; i++){
864
        uint32_t key= map[i];
865

    
866
        key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
867

    
868
        if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
869

    
870
        minima[j].height= score_map[i];
871
        minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
872
        minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
873
        minima[j].x-= (1<<(ME_MAP_MV_BITS-1));
874
        minima[j].y-= (1<<(ME_MAP_MV_BITS-1));
875

    
876
        // all entries in map should be in range except if the mv overflows their ME_MAP_MV_BITS bits space
877
        if(   minima[j].x > xmax || minima[j].x < xmin
878
           || minima[j].y > ymax || minima[j].y < ymin)
879
            continue;
880

    
881
        minima[j].checked=0;
882
        if(minima[j].x || minima[j].y)
883
            minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
884

    
885
        j++;
886
    }
887

    
888
    qsort(minima, j, sizeof(Minima), minima_cmp);
889

    
890
    for(; j<minima_count; j++){
891
        minima[j].height=256*256*256*64;
892
        minima[j].checked=0;
893
        minima[j].x= minima[j].y=0;
894
    }
895

    
896
    for(i=0; i<minima_count; i++){
897
        const int x= minima[i].x;
898
        const int y= minima[i].y;
899
        int d;
900

    
901
        if(minima[i].checked) continue;
902

    
903
        if(   x >= xmax || x <= xmin
904
           || y >= ymax || y <= ymin)
905
           continue;
906

    
907
        SAB_CHECK_MV(x-1, y)
908
        SAB_CHECK_MV(x+1, y)
909
        SAB_CHECK_MV(x  , y-1)
910
        SAB_CHECK_MV(x  , y+1)
911

    
912
        minima[i].checked= 1;
913
    }
914

    
915
    best[0]= minima[0].x;
916
    best[1]= minima[0].y;
917
    dmin= minima[0].height;
918

    
919
    if(   best[0] < xmax && best[0] > xmin
920
       && best[1] < ymax && best[1] > ymin){
921
        int d;
922
        //ensure that the refernece samples for hpel refinement are in the map
923
        CHECK_MV(best[0]-1, best[1])
924
        CHECK_MV(best[0]+1, best[1])
925
        CHECK_MV(best[0], best[1]-1)
926
        CHECK_MV(best[0], best[1]+1)
927
    }
928
    return dmin;
929
}
930

    
931
static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
932
                                       int src_index, int ref_index, int const penalty_factor,
933
                                       int size, int h, int flags)
934
{
935
    MotionEstContext * const c= &s->me;
936
    me_cmp_func cmpf, chroma_cmpf;
937
    int dia_size;
938
    LOAD_COMMON
939
    LOAD_COMMON2
940
    int map_generation= c->map_generation;
941

    
942
    cmpf= s->dsp.me_cmp[size];
943
    chroma_cmpf= s->dsp.me_cmp[size+1];
944

    
945
    for(dia_size=1; dia_size<=c->dia_size; dia_size++){
946
        int dir, start, end;
947
        const int x= best[0];
948
        const int y= best[1];
949

    
950
        start= FFMAX(0, y + dia_size - ymax);
951
        end  = FFMIN(dia_size, xmax - x + 1);
952
        for(dir= start; dir<end; dir++){
953
            int d;
954

    
955
//check(x + dir,y + dia_size - dir,0, a0)
956
            CHECK_MV(x + dir           , y + dia_size - dir);
957
        }
958

    
959
        start= FFMAX(0, x + dia_size - xmax);
960
        end  = FFMIN(dia_size, y - ymin + 1);
961
        for(dir= start; dir<end; dir++){
962
            int d;
963

    
964
//check(x + dia_size - dir, y - dir,0, a1)
965
            CHECK_MV(x + dia_size - dir, y - dir           );
966
        }
967

    
968
        start= FFMAX(0, -y + dia_size + ymin );
969
        end  = FFMIN(dia_size, x - xmin + 1);
970
        for(dir= start; dir<end; dir++){
971
            int d;
972

    
973
//check(x - dir,y - dia_size + dir,0, a2)
974
            CHECK_MV(x - dir           , y - dia_size + dir);
975
        }
976

    
977
        start= FFMAX(0, -x + dia_size + xmin );
978
        end  = FFMIN(dia_size, ymax - y + 1);
979
        for(dir= start; dir<end; dir++){
980
            int d;
981

    
982
//check(x - dia_size + dir, y + dir,0, a3)
983
            CHECK_MV(x - dia_size + dir, y + dir           );
984
        }
985

    
986
        if(x!=best[0] || y!=best[1])
987
            dia_size=0;
988
#if 0
989
{
990
int dx, dy, i;
991
static int stats[8*8];
992
dx= FFABS(x-best[0]);
993
dy= FFABS(y-best[1]);
994
stats[dy*8 + dx] ++;
995
if(256*256*256*64 % (stats[0]+1)==0){
996
    for(i=0; i<64; i++){
997
        if((i&7)==0) printf("\n");
998
        printf("%6d ", stats[i]);
999
    }
1000
    printf("\n");
1001
}
1002
}
1003
#endif
1004
    }
1005
    return dmin;
1006
}
1007

    
1008
static av_always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
1009
                                       int src_index, int ref_index, int const penalty_factor,
1010
                                       int size, int h, int flags){
1011
    MotionEstContext * const c= &s->me;
1012
    if(c->dia_size==-1)
1013
        return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1014
    else if(c->dia_size<-1)
1015
        return   sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1016
    else if(c->dia_size<2)
1017
        return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1018
    else if(c->dia_size>1024)
1019
        return          full_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1020
    else if(c->dia_size>768)
1021
        return           umh_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1022
    else if(c->dia_size>512)
1023
        return           hex_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags, c->dia_size&0xFF);
1024
    else if(c->dia_size>256)
1025
        return       l2s_dia_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1026
    else
1027
        return   var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1028
}
1029

    
1030
/*!
1031
   \param P[10][2] a list of candidate mvs to check before starting the
1032
   iterative search. If one of the candidates is close to the optimal mv, then
1033
   it takes fewer iterations. And it increases the chance that we find the
1034
   optimal mv.
1035
 */
1036
static av_always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
1037
                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
1038
                             int ref_mv_scale, int flags, int size, int h)
1039
{
1040
    MotionEstContext * const c= &s->me;
1041
    int best[2]={0, 0};      /*!< x and y coordinates of the best motion vector.
1042
                               i.e. the difference between the position of the
1043
                               block currently being encoded and the position of
1044
                               the block chosen to predict it from. */
1045
    int d;                   ///< the score (cmp + penalty) of any given mv
1046
    int dmin;                /*!< the best value of d, i.e. the score
1047
                               corresponding to the mv stored in best[]. */
1048
    int map_generation;
1049
    int penalty_factor;
1050
    const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
1051
    const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
1052
    me_cmp_func cmpf, chroma_cmpf;
1053

    
1054
    LOAD_COMMON
1055
    LOAD_COMMON2
1056

    
1057
    if(c->pre_pass){
1058
        penalty_factor= c->pre_penalty_factor;
1059
        cmpf= s->dsp.me_pre_cmp[size];
1060
        chroma_cmpf= s->dsp.me_pre_cmp[size+1];
1061
    }else{
1062
        penalty_factor= c->penalty_factor;
1063
        cmpf= s->dsp.me_cmp[size];
1064
        chroma_cmpf= s->dsp.me_cmp[size+1];
1065
    }
1066

    
1067
    map_generation= update_map_generation(c);
1068

    
1069
    assert(cmpf);
1070
    dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
1071
    map[0]= map_generation;
1072
    score_map[0]= dmin;
1073

    
1074
    //FIXME precalc first term below?
1075
    if((s->pict_type == FF_B_TYPE && !(c->flags & FLAG_DIRECT)) || s->flags&CODEC_FLAG_MV0)
1076
        dmin += (mv_penalty[pred_x] + mv_penalty[pred_y])*penalty_factor;
1077

    
1078
    /* first line */
1079
    if (s->first_slice_line) {
1080
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1081
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1082
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1083
    }else{
1084
        if(dmin<((h*h*s->avctx->mv0_threshold)>>8)
1085
                    && ( P_LEFT[0]    |P_LEFT[1]
1086
                        |P_TOP[0]     |P_TOP[1]
1087
                        |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
1088
            *mx_ptr= 0;
1089
            *my_ptr= 0;
1090
            c->skip=1;
1091
            return dmin;
1092
        }
1093
        CHECK_MV(    P_MEDIAN[0] >>shift ,    P_MEDIAN[1] >>shift)
1094
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)-1)
1095
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)  , (P_MEDIAN[1]>>shift)+1)
1096
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)-1, (P_MEDIAN[1]>>shift)  )
1097
        CHECK_CLIPPED_MV((P_MEDIAN[0]>>shift)+1, (P_MEDIAN[1]>>shift)  )
1098
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1099
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1100
        CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
1101
        CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
1102
        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1103
    }
1104
    if(dmin>h*h*4){
1105
        if(c->pre_pass){
1106
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
1107
                            (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
1108
            if(!s->first_slice_line)
1109
                CHECK_CLIPPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1110
                                (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1111
        }else{
1112
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1113
                            (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1114
            if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1115
                CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1116
                                (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1117
        }
1118
    }
1119

    
1120
    if(c->avctx->last_predictor_count){
1121
        const int count= c->avctx->last_predictor_count;
1122
        const int xstart= FFMAX(0, s->mb_x - count);
1123
        const int ystart= FFMAX(0, s->mb_y - count);
1124
        const int xend= FFMIN(s->mb_width , s->mb_x + count + 1);
1125
        const int yend= FFMIN(s->mb_height, s->mb_y + count + 1);
1126
        int mb_y;
1127

    
1128
        for(mb_y=ystart; mb_y<yend; mb_y++){
1129
            int mb_x;
1130
            for(mb_x=xstart; mb_x<xend; mb_x++){
1131
                const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride;
1132
                int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16;
1133
                int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16;
1134

    
1135
                if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue;
1136
                CHECK_MV(mx,my)
1137
            }
1138
        }
1139
    }
1140

    
1141
//check(best[0],best[1],0, b0)
1142
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1143

    
1144
//check(best[0],best[1],0, b1)
1145
    *mx_ptr= best[0];
1146
    *my_ptr= best[1];
1147

    
1148
//    printf("%d %d %d \n", best[0], best[1], dmin);
1149
    return dmin;
1150
}
1151

    
1152
//this function is dedicated to the braindamaged gcc
1153
inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
1154
                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
1155
                             int ref_mv_scale, int size, int h)
1156
{
1157
    MotionEstContext * const c= &s->me;
1158
//FIXME convert other functions in the same way if faster
1159
    if(c->flags==0 && h==16 && size==0){
1160
        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16);
1161
//    case FLAG_QPEL:
1162
//        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
1163
    }else{
1164
        return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h);
1165
    }
1166
}
1167

    
1168
static int epzs_motion_search4(MpegEncContext * s,
1169
                             int *mx_ptr, int *my_ptr, int P[10][2],
1170
                             int src_index, int ref_index, int16_t (*last_mv)[2],
1171
                             int ref_mv_scale)
1172
{
1173
    MotionEstContext * const c= &s->me;
1174
    int best[2]={0, 0};
1175
    int d, dmin;
1176
    int map_generation;
1177
    const int penalty_factor= c->penalty_factor;
1178
    const int size=1;
1179
    const int h=8;
1180
    const int ref_mv_stride= s->mb_stride;
1181
    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1182
    me_cmp_func cmpf, chroma_cmpf;
1183
    LOAD_COMMON
1184
    int flags= c->flags;
1185
    LOAD_COMMON2
1186

    
1187
    cmpf= s->dsp.me_cmp[size];
1188
    chroma_cmpf= s->dsp.me_cmp[size+1];
1189

    
1190
    map_generation= update_map_generation(c);
1191

    
1192
    dmin = 1000000;
1193
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1194
    /* first line */
1195
    if (s->first_slice_line) {
1196
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1197
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1198
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1199
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1200
    }else{
1201
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1202
        //FIXME try some early stop
1203
        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1204
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1205
        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1206
        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1207
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1208
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1209
    }
1210
    if(dmin>64*4){
1211
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1212
                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1213
        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1214
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1215
                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1216
    }
1217

    
1218
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1219

    
1220
    *mx_ptr= best[0];
1221
    *my_ptr= best[1];
1222

    
1223
//    printf("%d %d %d \n", best[0], best[1], dmin);
1224
    return dmin;
1225
}
1226

    
1227
//try to merge with above FIXME (needs PSNR test)
1228
static int epzs_motion_search2(MpegEncContext * s,
1229
                             int *mx_ptr, int *my_ptr, int P[10][2],
1230
                             int src_index, int ref_index, int16_t (*last_mv)[2],
1231
                             int ref_mv_scale)
1232
{
1233
    MotionEstContext * const c= &s->me;
1234
    int best[2]={0, 0};
1235
    int d, dmin;
1236
    int map_generation;
1237
    const int penalty_factor= c->penalty_factor;
1238
    const int size=0; //FIXME pass as arg
1239
    const int h=8;
1240
    const int ref_mv_stride= s->mb_stride;
1241
    const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1242
    me_cmp_func cmpf, chroma_cmpf;
1243
    LOAD_COMMON
1244
    int flags= c->flags;
1245
    LOAD_COMMON2
1246

    
1247
    cmpf= s->dsp.me_cmp[size];
1248
    chroma_cmpf= s->dsp.me_cmp[size+1];
1249

    
1250
    map_generation= update_map_generation(c);
1251

    
1252
    dmin = 1000000;
1253
//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1254
    /* first line */
1255
    if (s->first_slice_line) {
1256
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1257
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1258
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1259
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1260
    }else{
1261
        CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
1262
        //FIXME try some early stop
1263
        CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
1264
        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
1265
        CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
1266
        CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
1267
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
1268
                        (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
1269
    }
1270
    if(dmin>64*4){
1271
        CHECK_CLIPPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
1272
                        (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
1273
        if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
1274
            CHECK_CLIPPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1275
                            (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1276
    }
1277

    
1278
    dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1279

    
1280
    *mx_ptr= best[0];
1281
    *my_ptr= best[1];
1282

    
1283
//    printf("%d %d %d \n", best[0], best[1], dmin);
1284
    return dmin;
1285
}