Statistics
| Branch: | Revision:

ffmpeg / libavcodec / mpegvideo.c @ 1bd86246

History | View | Annotate | Download (241 KB)

1
/*
2
 * The simplest mpeg encoder (well, it was the simplest!)
3
 * Copyright (c) 2000,2001 Fabrice Bellard.
4
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 *
20
 * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
21
 */
22

    
23
/**
24
 * @file mpegvideo.c
25
 * The simplest mpeg encoder (well, it was the simplest!).
26
 */
27

    
28
#include "avcodec.h"
29
#include "dsputil.h"
30
#include "mpegvideo.h"
31
#include "faandct.h"
32
#include <limits.h>
33

    
34
#ifdef USE_FASTMEMCPY
35
#include "fastmemcpy.h"
36
#endif
37

    
38
//#undef NDEBUG
39
//#include <assert.h>
40

    
41
#ifdef CONFIG_ENCODERS
42
static void encode_picture(MpegEncContext *s, int picture_number);
43
#endif //CONFIG_ENCODERS
44
static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
45
                                   DCTELEM *block, int n, int qscale);
46
static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
47
                                   DCTELEM *block, int n, int qscale);
48
static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
49
                                   DCTELEM *block, int n, int qscale);
50
static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
51
                                   DCTELEM *block, int n, int qscale);
52
static void dct_unquantize_h263_intra_c(MpegEncContext *s,
53
                                  DCTELEM *block, int n, int qscale);
54
static void dct_unquantize_h263_inter_c(MpegEncContext *s,
55
                                  DCTELEM *block, int n, int qscale);
56
static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
57
#ifdef CONFIG_ENCODERS
58
static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
59
static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
60
static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
61
static int sse_mb(MpegEncContext *s);
62
static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
63
#endif //CONFIG_ENCODERS
64

    
65
#ifdef HAVE_XVMC
66
extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
67
extern void XVMC_field_end(MpegEncContext *s);
68
extern void XVMC_decode_mb(MpegEncContext *s);
69
#endif
70

    
71
void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
72

    
73

    
74
/* enable all paranoid tests for rounding, overflows, etc... */
75
//#define PARANOID
76

    
77
//#define DEBUG
78

    
79

    
80
/* for jpeg fast DCT */
81
#define CONST_BITS 14
82

    
83
static const uint16_t aanscales[64] = {
84
    /* precomputed values scaled up by 14 bits */
85
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
86
    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
87
    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
88
    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
89
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
90
    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
91
    8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
92
    4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
93
};
94

    
95
static const uint8_t h263_chroma_roundtab[16] = {
96
//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
97
    0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
98
};
99

    
100
static const uint8_t ff_default_chroma_qscale_table[32]={
101
//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
102
    0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
103
};
104

    
105
#ifdef CONFIG_ENCODERS
106
static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
107
static uint8_t default_fcode_tab[MAX_MV*2+1];
108

    
109
enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
110

    
111
static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
112
                           const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
113
{
114
    int qscale;
115
    int shift=0;
116

    
117
    for(qscale=qmin; qscale<=qmax; qscale++){
118
        int i;
119
        if (dsp->fdct == ff_jpeg_fdct_islow
120
#ifdef FAAN_POSTSCALE
121
            || dsp->fdct == ff_faandct
122
#endif
123
            ) {
124
            for(i=0;i<64;i++) {
125
                const int j= dsp->idct_permutation[i];
126
                /* 16 <= qscale * quant_matrix[i] <= 7905 */
127
                /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
128
                /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
129
                /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
130

    
131
                qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
132
                                (qscale * quant_matrix[j]));
133
            }
134
        } else if (dsp->fdct == fdct_ifast
135
#ifndef FAAN_POSTSCALE
136
                   || dsp->fdct == ff_faandct
137
#endif
138
                   ) {
139
            for(i=0;i<64;i++) {
140
                const int j= dsp->idct_permutation[i];
141
                /* 16 <= qscale * quant_matrix[i] <= 7905 */
142
                /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
143
                /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
144
                /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
145

    
146
                qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
147
                                (aanscales[i] * qscale * quant_matrix[j]));
148
            }
149
        } else {
150
            for(i=0;i<64;i++) {
151
                const int j= dsp->idct_permutation[i];
152
                /* We can safely suppose that 16 <= quant_matrix[i] <= 255
153
                   So 16           <= qscale * quant_matrix[i]             <= 7905
154
                   so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
155
                   so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
156
                */
157
                qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
158
//                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
159
                qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
160

    
161
                if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
162
                qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
163
            }
164
        }
165

    
166
        for(i=intra; i<64; i++){
167
            int64_t max= 8191;
168
            if (dsp->fdct == fdct_ifast
169
#ifndef FAAN_POSTSCALE
170
                   || dsp->fdct == ff_faandct
171
#endif
172
                   ) {
173
                max= (8191LL*aanscales[i]) >> 14;
174
            }
175
            while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
176
                shift++;
177
            }
178
        }
179
    }
180
    if(shift){
181
        av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
182
    }
183
}
184

    
185
static inline void update_qscale(MpegEncContext *s){
186
    s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
187
    s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
188

    
189
    s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
190
}
191
#endif //CONFIG_ENCODERS
192

    
193
void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
194
    int i;
195
    int end;
196

    
197
    st->scantable= src_scantable;
198

    
199
    for(i=0; i<64; i++){
200
        int j;
201
        j = src_scantable[i];
202
        st->permutated[i] = permutation[j];
203
#ifdef ARCH_POWERPC
204
        st->inverse[j] = i;
205
#endif
206
    }
207

    
208
    end=-1;
209
    for(i=0; i<64; i++){
210
        int j;
211
        j = st->permutated[i];
212
        if(j>end) end=j;
213
        st->raster_end[i]= end;
214
    }
215
}
216

    
217
#ifdef CONFIG_ENCODERS
218
void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
219
    int i;
220

    
221
    if(matrix){
222
        put_bits(pb, 1, 1);
223
        for(i=0;i<64;i++) {
224
            put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
225
        }
226
    }else
227
        put_bits(pb, 1, 0);
228
}
229
#endif //CONFIG_ENCODERS
230

    
231
const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
232
    int i;
233

    
234
    assert(p<=end);
235
    if(p>=end)
236
        return end;
237

    
238
    for(i=0; i<3; i++){
239
        uint32_t tmp= *state << 8;
240
        *state= tmp + *(p++);
241
        if(tmp == 0x100 || p==end)
242
            return p;
243
    }
244

    
245
    while(p<end){
246
        if     (p[-1] > 1      ) p+= 3;
247
        else if(p[-2]          ) p+= 2;
248
        else if(p[-3]|(p[-1]-1)) p++;
249
        else{
250
            p++;
251
            break;
252
        }
253
    }
254

    
255
    p= FFMIN(p, end)-4;
256
    *state=  be2me_32(unaligned32(p));
257

    
258
    return p+4;
259
}
260

    
261
/* init common dct for both encoder and decoder */
262
int DCT_common_init(MpegEncContext *s)
263
{
264
    s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
265
    s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
266
    s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
267
    s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
268
    s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
269
    s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
270

    
271
#ifdef CONFIG_ENCODERS
272
    s->dct_quantize= dct_quantize_c;
273
    s->denoise_dct= denoise_dct_c;
274
#endif //CONFIG_ENCODERS
275

    
276
#ifdef HAVE_MMX
277
    MPV_common_init_mmx(s);
278
#endif
279
#ifdef ARCH_ALPHA
280
    MPV_common_init_axp(s);
281
#endif
282
#ifdef HAVE_MLIB
283
    MPV_common_init_mlib(s);
284
#endif
285
#ifdef HAVE_MMI
286
    MPV_common_init_mmi(s);
287
#endif
288
#ifdef ARCH_ARMV4L
289
    MPV_common_init_armv4l(s);
290
#endif
291
#ifdef ARCH_POWERPC
292
    MPV_common_init_ppc(s);
293
#endif
294

    
295
#ifdef CONFIG_ENCODERS
296
    s->fast_dct_quantize= s->dct_quantize;
297

    
298
    if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
299
        s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
300
    }
301

    
302
#endif //CONFIG_ENCODERS
303

    
304
    /* load & permutate scantables
305
       note: only wmv uses different ones
306
    */
307
    if(s->alternate_scan){
308
        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
309
        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
310
    }else{
311
        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
312
        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
313
    }
314
    ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
315
    ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
316

    
317
    return 0;
318
}
319

    
320
static void copy_picture(Picture *dst, Picture *src){
321
    *dst = *src;
322
    dst->type= FF_BUFFER_TYPE_COPY;
323
}
324

    
325
static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
326
    int i;
327

    
328
    dst->pict_type              = src->pict_type;
329
    dst->quality                = src->quality;
330
    dst->coded_picture_number   = src->coded_picture_number;
331
    dst->display_picture_number = src->display_picture_number;
332
//    dst->reference              = src->reference;
333
    dst->pts                    = src->pts;
334
    dst->interlaced_frame       = src->interlaced_frame;
335
    dst->top_field_first        = src->top_field_first;
336

    
337
    if(s->avctx->me_threshold){
338
        if(!src->motion_val[0])
339
            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
340
        if(!src->mb_type)
341
            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
342
        if(!src->ref_index[0])
343
            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
344
        if(src->motion_subsample_log2 != dst->motion_subsample_log2)
345
            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
346
            src->motion_subsample_log2, dst->motion_subsample_log2);
347

    
348
        memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
349

    
350
        for(i=0; i<2; i++){
351
            int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
352
            int height= ((16*s->mb_height)>>src->motion_subsample_log2);
353

    
354
            if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
355
                memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
356
            }
357
            if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
358
                memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
359
            }
360
        }
361
    }
362
}
363

    
364
/**
365
 * allocates a Picture
366
 * The pixels are allocated/set by calling get_buffer() if shared=0
367
 */
368
static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
369
    const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
370
    const int mb_array_size= s->mb_stride*s->mb_height;
371
    const int b8_array_size= s->b8_stride*s->mb_height*2;
372
    const int b4_array_size= s->b4_stride*s->mb_height*4;
373
    int i;
374

    
375
    if(shared){
376
        assert(pic->data[0]);
377
        assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
378
        pic->type= FF_BUFFER_TYPE_SHARED;
379
    }else{
380
        int r;
381

    
382
        assert(!pic->data[0]);
383

    
384
        r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
385

    
386
        if(r<0 || !pic->age || !pic->type || !pic->data[0]){
387
            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
388
            return -1;
389
        }
390

    
391
        if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
392
            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
393
            return -1;
394
        }
395

    
396
        if(pic->linesize[1] != pic->linesize[2]){
397
            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
398
            return -1;
399
        }
400

    
401
        s->linesize  = pic->linesize[0];
402
        s->uvlinesize= pic->linesize[1];
403
    }
404

    
405
    if(pic->qscale_table==NULL){
406
        if (s->encoding) {
407
            CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
408
            CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
409
            CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
410
        }
411

    
412
        CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
413
        CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
414
        CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
415
        pic->mb_type= pic->mb_type_base + s->mb_stride+1;
416
        if(s->out_format == FMT_H264){
417
            for(i=0; i<2; i++){
418
                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
419
                pic->motion_val[i]= pic->motion_val_base[i]+4;
420
                CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
421
            }
422
            pic->motion_subsample_log2= 2;
423
        }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
424
            for(i=0; i<2; i++){
425
                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
426
                pic->motion_val[i]= pic->motion_val_base[i]+4;
427
                CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
428
            }
429
            pic->motion_subsample_log2= 3;
430
        }
431
        if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
432
            CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
433
        }
434
        pic->qstride= s->mb_stride;
435
        CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
436
    }
437

    
438
    //it might be nicer if the application would keep track of these but it would require a API change
439
    memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
440
    s->prev_pict_types[0]= s->pict_type;
441
    if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
442
        pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
443

    
444
    return 0;
445
fail: //for the CHECKED_ALLOCZ macro
446
    return -1;
447
}
448

    
449
/**
450
 * deallocates a picture
451
 */
452
static void free_picture(MpegEncContext *s, Picture *pic){
453
    int i;
454

    
455
    if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
456
        s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
457
    }
458

    
459
    av_freep(&pic->mb_var);
460
    av_freep(&pic->mc_mb_var);
461
    av_freep(&pic->mb_mean);
462
    av_freep(&pic->mbskip_table);
463
    av_freep(&pic->qscale_table);
464
    av_freep(&pic->mb_type_base);
465
    av_freep(&pic->dct_coeff);
466
    av_freep(&pic->pan_scan);
467
    pic->mb_type= NULL;
468
    for(i=0; i<2; i++){
469
        av_freep(&pic->motion_val_base[i]);
470
        av_freep(&pic->ref_index[i]);
471
    }
472

    
473
    if(pic->type == FF_BUFFER_TYPE_SHARED){
474
        for(i=0; i<4; i++){
475
            pic->base[i]=
476
            pic->data[i]= NULL;
477
        }
478
        pic->type= 0;
479
    }
480
}
481

    
482
static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
483
    int i;
484

    
485
    // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
486
    CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
487
    s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
488

    
489
     //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
490
    CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
491
    s->rd_scratchpad=   s->me.scratchpad;
492
    s->b_scratchpad=    s->me.scratchpad;
493
    s->obmc_scratchpad= s->me.scratchpad + 16;
494
    if (s->encoding) {
495
        CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
496
        CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
497
        if(s->avctx->noise_reduction){
498
            CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
499
        }
500
    }
501
    CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
502
    s->block= s->blocks[0];
503

    
504
    for(i=0;i<12;i++){
505
        s->pblocks[i] = (short *)(&s->block[i]);
506
    }
507
    return 0;
508
fail:
509
    return -1; //free() through MPV_common_end()
510
}
511

    
512
static void free_duplicate_context(MpegEncContext *s){
513
    if(s==NULL) return;
514

    
515
    av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
516
    av_freep(&s->me.scratchpad);
517
    s->rd_scratchpad=
518
    s->b_scratchpad=
519
    s->obmc_scratchpad= NULL;
520

    
521
    av_freep(&s->dct_error_sum);
522
    av_freep(&s->me.map);
523
    av_freep(&s->me.score_map);
524
    av_freep(&s->blocks);
525
    s->block= NULL;
526
}
527

    
528
static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
529
#define COPY(a) bak->a= src->a
530
    COPY(allocated_edge_emu_buffer);
531
    COPY(edge_emu_buffer);
532
    COPY(me.scratchpad);
533
    COPY(rd_scratchpad);
534
    COPY(b_scratchpad);
535
    COPY(obmc_scratchpad);
536
    COPY(me.map);
537
    COPY(me.score_map);
538
    COPY(blocks);
539
    COPY(block);
540
    COPY(start_mb_y);
541
    COPY(end_mb_y);
542
    COPY(me.map_generation);
543
    COPY(pb);
544
    COPY(dct_error_sum);
545
    COPY(dct_count[0]);
546
    COPY(dct_count[1]);
547
#undef COPY
548
}
549

    
550
void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
551
    MpegEncContext bak;
552
    int i;
553
    //FIXME copy only needed parts
554
//START_TIMER
555
    backup_duplicate_context(&bak, dst);
556
    memcpy(dst, src, sizeof(MpegEncContext));
557
    backup_duplicate_context(dst, &bak);
558
    for(i=0;i<12;i++){
559
        dst->pblocks[i] = (short *)(&dst->block[i]);
560
    }
561
//STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
562
}
563

    
564
static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
565
#define COPY(a) dst->a= src->a
566
    COPY(pict_type);
567
    COPY(current_picture);
568
    COPY(f_code);
569
    COPY(b_code);
570
    COPY(qscale);
571
    COPY(lambda);
572
    COPY(lambda2);
573
    COPY(picture_in_gop_number);
574
    COPY(gop_picture_number);
575
    COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
576
    COPY(progressive_frame); //FIXME don't set in encode_header
577
    COPY(partitioned_frame); //FIXME don't set in encode_header
578
#undef COPY
579
}
580

    
581
/**
582
 * sets the given MpegEncContext to common defaults (same for encoding and decoding).
583
 * the changed fields will not depend upon the prior state of the MpegEncContext.
584
 */
585
static void MPV_common_defaults(MpegEncContext *s){
586
    s->y_dc_scale_table=
587
    s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
588
    s->chroma_qscale_table= ff_default_chroma_qscale_table;
589
    s->progressive_frame= 1;
590
    s->progressive_sequence= 1;
591
    s->picture_structure= PICT_FRAME;
592

    
593
    s->coded_picture_number = 0;
594
    s->picture_number = 0;
595
    s->input_picture_number = 0;
596

    
597
    s->picture_in_gop_number = 0;
598

    
599
    s->f_code = 1;
600
    s->b_code = 1;
601
}
602

    
603
/**
604
 * sets the given MpegEncContext to defaults for decoding.
605
 * the changed fields will not depend upon the prior state of the MpegEncContext.
606
 */
607
void MPV_decode_defaults(MpegEncContext *s){
608
    MPV_common_defaults(s);
609
}
610

    
611
/**
612
 * sets the given MpegEncContext to defaults for encoding.
613
 * the changed fields will not depend upon the prior state of the MpegEncContext.
614
 */
615

    
616
#ifdef CONFIG_ENCODERS
617
static void MPV_encode_defaults(MpegEncContext *s){
618
    static int done=0;
619

    
620
    MPV_common_defaults(s);
621

    
622
    if(!done){
623
        int i;
624
        done=1;
625

    
626
        default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
627
        memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
628

    
629
        for(i=-16; i<16; i++){
630
            default_fcode_tab[i + MAX_MV]= 1;
631
        }
632
    }
633
    s->me.mv_penalty= default_mv_penalty;
634
    s->fcode_tab= default_fcode_tab;
635
}
636
#endif //CONFIG_ENCODERS
637

    
638
/**
639
 * init common structure for both encoder and decoder.
640
 * this assumes that some variables like width/height are already set
641
 */
642
int MPV_common_init(MpegEncContext *s)
643
{
644
    int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
645

    
646
    s->mb_height = (s->height + 15) / 16;
647

    
648
    if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
649
        av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
650
        return -1;
651
    }
652

    
653
    if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
654
        return -1;
655

    
656
    dsputil_init(&s->dsp, s->avctx);
657
    DCT_common_init(s);
658

    
659
    s->flags= s->avctx->flags;
660
    s->flags2= s->avctx->flags2;
661

    
662
    s->mb_width  = (s->width  + 15) / 16;
663
    s->mb_stride = s->mb_width + 1;
664
    s->b8_stride = s->mb_width*2 + 1;
665
    s->b4_stride = s->mb_width*4 + 1;
666
    mb_array_size= s->mb_height * s->mb_stride;
667
    mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
668

    
669
    /* set chroma shifts */
670
    avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
671
                                                    &(s->chroma_y_shift) );
672

    
673
    /* set default edge pos, will be overriden in decode_header if needed */
674
    s->h_edge_pos= s->mb_width*16;
675
    s->v_edge_pos= s->mb_height*16;
676

    
677
    s->mb_num = s->mb_width * s->mb_height;
678

    
679
    s->block_wrap[0]=
680
    s->block_wrap[1]=
681
    s->block_wrap[2]=
682
    s->block_wrap[3]= s->b8_stride;
683
    s->block_wrap[4]=
684
    s->block_wrap[5]= s->mb_stride;
685

    
686
    y_size = s->b8_stride * (2 * s->mb_height + 1);
687
    c_size = s->mb_stride * (s->mb_height + 1);
688
    yc_size = y_size + 2 * c_size;
689

    
690
    /* convert fourcc to upper case */
691
    s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
692
                        + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
693
                        + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
694
                        + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
695

    
696
    s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
697
                               + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
698
                               + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
699
                               + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
700

    
701
    s->avctx->coded_frame= (AVFrame*)&s->current_picture;
702

    
703
    CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
704
    for(y=0; y<s->mb_height; y++){
705
        for(x=0; x<s->mb_width; x++){
706
            s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
707
        }
708
    }
709
    s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
710

    
711
    if (s->encoding) {
712
        /* Allocate MV tables */
713
        CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
714
        CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
715
        CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
716
        CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
717
        CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
718
        CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
719
        s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
720
        s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
721
        s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
722
        s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
723
        s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
724
        s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
725

    
726
        if(s->msmpeg4_version){
727
            CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
728
        }
729
        CHECKED_ALLOCZ(s->avctx->stats_out, 256);
730

    
731
        /* Allocate MB type table */
732
        CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
733

    
734
        CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
735

    
736
        CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
737
        CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
738
        CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
739
        CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
740
        CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
741
        CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
742

    
743
        if(s->avctx->noise_reduction){
744
            CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
745
        }
746
    }
747
    CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
748

    
749
    CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
750

    
751
    if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
752
        /* interlaced direct mode decoding tables */
753
            for(i=0; i<2; i++){
754
                int j, k;
755
                for(j=0; j<2; j++){
756
                    for(k=0; k<2; k++){
757
                        CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
758
                        s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
759
                    }
760
                    CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
761
                    CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
762
                    s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
763
                }
764
                CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
765
            }
766
    }
767
    if (s->out_format == FMT_H263) {
768
        /* ac values */
769
        CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
770
        s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
771
        s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
772
        s->ac_val[2] = s->ac_val[1] + c_size;
773

    
774
        /* cbp values */
775
        CHECKED_ALLOCZ(s->coded_block_base, y_size);
776
        s->coded_block= s->coded_block_base + s->b8_stride + 1;
777

    
778
        /* cbp, ac_pred, pred_dir */
779
        CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
780
        CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
781
    }
782

    
783
    if (s->h263_pred || s->h263_plus || !s->encoding) {
784
        /* dc values */
785
        //MN: we need these for error resilience of intra-frames
786
        CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
787
        s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
788
        s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
789
        s->dc_val[2] = s->dc_val[1] + c_size;
790
        for(i=0;i<yc_size;i++)
791
            s->dc_val_base[i] = 1024;
792
    }
793

    
794
    /* which mb is a intra block */
795
    CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
796
    memset(s->mbintra_table, 1, mb_array_size);
797

    
798
    /* init macroblock skip table */
799
    CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
800
    //Note the +1 is for a quicker mpeg4 slice_end detection
801
    CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
802

    
803
    s->parse_context.state= -1;
804
    if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
805
       s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
806
       s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
807
       s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
808
    }
809

    
810
    s->context_initialized = 1;
811

    
812
    s->thread_context[0]= s;
813
    for(i=1; i<s->avctx->thread_count; i++){
814
        s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
815
        memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
816
    }
817

    
818
    for(i=0; i<s->avctx->thread_count; i++){
819
        if(init_duplicate_context(s->thread_context[i], s) < 0)
820
           goto fail;
821
        s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
822
        s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
823
    }
824

    
825
    return 0;
826
 fail:
827
    MPV_common_end(s);
828
    return -1;
829
}
830

    
831
/* init common structure for both encoder and decoder */
832
void MPV_common_end(MpegEncContext *s)
833
{
834
    int i, j, k;
835

    
836
    for(i=0; i<s->avctx->thread_count; i++){
837
        free_duplicate_context(s->thread_context[i]);
838
    }
839
    for(i=1; i<s->avctx->thread_count; i++){
840
        av_freep(&s->thread_context[i]);
841
    }
842

    
843
    av_freep(&s->parse_context.buffer);
844
    s->parse_context.buffer_size=0;
845

    
846
    av_freep(&s->mb_type);
847
    av_freep(&s->p_mv_table_base);
848
    av_freep(&s->b_forw_mv_table_base);
849
    av_freep(&s->b_back_mv_table_base);
850
    av_freep(&s->b_bidir_forw_mv_table_base);
851
    av_freep(&s->b_bidir_back_mv_table_base);
852
    av_freep(&s->b_direct_mv_table_base);
853
    s->p_mv_table= NULL;
854
    s->b_forw_mv_table= NULL;
855
    s->b_back_mv_table= NULL;
856
    s->b_bidir_forw_mv_table= NULL;
857
    s->b_bidir_back_mv_table= NULL;
858
    s->b_direct_mv_table= NULL;
859
    for(i=0; i<2; i++){
860
        for(j=0; j<2; j++){
861
            for(k=0; k<2; k++){
862
                av_freep(&s->b_field_mv_table_base[i][j][k]);
863
                s->b_field_mv_table[i][j][k]=NULL;
864
            }
865
            av_freep(&s->b_field_select_table[i][j]);
866
            av_freep(&s->p_field_mv_table_base[i][j]);
867
            s->p_field_mv_table[i][j]=NULL;
868
        }
869
        av_freep(&s->p_field_select_table[i]);
870
    }
871

    
872
    av_freep(&s->dc_val_base);
873
    av_freep(&s->ac_val_base);
874
    av_freep(&s->coded_block_base);
875
    av_freep(&s->mbintra_table);
876
    av_freep(&s->cbp_table);
877
    av_freep(&s->pred_dir_table);
878

    
879
    av_freep(&s->mbskip_table);
880
    av_freep(&s->prev_pict_types);
881
    av_freep(&s->bitstream_buffer);
882
    s->allocated_bitstream_buffer_size=0;
883

    
884
    av_freep(&s->avctx->stats_out);
885
    av_freep(&s->ac_stats);
886
    av_freep(&s->error_status_table);
887
    av_freep(&s->mb_index2xy);
888
    av_freep(&s->lambda_table);
889
    av_freep(&s->q_intra_matrix);
890
    av_freep(&s->q_inter_matrix);
891
    av_freep(&s->q_intra_matrix16);
892
    av_freep(&s->q_inter_matrix16);
893
    av_freep(&s->input_picture);
894
    av_freep(&s->reordered_input_picture);
895
    av_freep(&s->dct_offset);
896

    
897
    if(s->picture){
898
        for(i=0; i<MAX_PICTURE_COUNT; i++){
899
            free_picture(s, &s->picture[i]);
900
        }
901
    }
902
    av_freep(&s->picture);
903
    s->context_initialized = 0;
904
    s->last_picture_ptr=
905
    s->next_picture_ptr=
906
    s->current_picture_ptr= NULL;
907
    s->linesize= s->uvlinesize= 0;
908

    
909
    for(i=0; i<3; i++)
910
        av_freep(&s->visualization_buffer[i]);
911

    
912
    avcodec_default_free_buffers(s->avctx);
913
}
914

    
915
#ifdef CONFIG_ENCODERS
916

    
917
/* init video encoder */
918
int MPV_encode_init(AVCodecContext *avctx)
919
{
920
    MpegEncContext *s = avctx->priv_data;
921
    int i;
922
    int chroma_h_shift, chroma_v_shift;
923

    
924
    MPV_encode_defaults(s);
925

    
926
    if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
927
        av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
928
        return -1;
929
    }
930

    
931
    if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
932
        if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUVJ420P){
933
            av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
934
            return -1;
935
        }
936
    }else{
937
        if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUV420P){
938
            av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
939
            return -1;
940
        }
941
    }
942

    
943
    s->bit_rate = avctx->bit_rate;
944
    s->width = avctx->width;
945
    s->height = avctx->height;
946
    if(avctx->gop_size > 600){
947
        av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
948
        avctx->gop_size=600;
949
    }
950
    s->gop_size = avctx->gop_size;
951
    s->avctx = avctx;
952
    s->flags= avctx->flags;
953
    s->flags2= avctx->flags2;
954
    s->max_b_frames= avctx->max_b_frames;
955
    s->codec_id= avctx->codec->id;
956
    s->luma_elim_threshold  = avctx->luma_elim_threshold;
957
    s->chroma_elim_threshold= avctx->chroma_elim_threshold;
958
    s->strict_std_compliance= avctx->strict_std_compliance;
959
    s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
960
    s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
961
    s->mpeg_quant= avctx->mpeg_quant;
962
    s->rtp_mode= !!avctx->rtp_payload_size;
963
    s->intra_dc_precision= avctx->intra_dc_precision;
964
    s->user_specified_pts = AV_NOPTS_VALUE;
965

    
966
    if (s->gop_size <= 1) {
967
        s->intra_only = 1;
968
        s->gop_size = 12;
969
    } else {
970
        s->intra_only = 0;
971
    }
972

    
973
    s->me_method = avctx->me_method;
974

    
975
    /* Fixed QSCALE */
976
    s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
977

    
978
    s->adaptive_quant= (   s->avctx->lumi_masking
979
                        || s->avctx->dark_masking
980
                        || s->avctx->temporal_cplx_masking
981
                        || s->avctx->spatial_cplx_masking
982
                        || s->avctx->p_masking
983
                        || s->avctx->border_masking
984
                        || (s->flags&CODEC_FLAG_QP_RD))
985
                       && !s->fixed_qscale;
986

    
987
    s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
988
    s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
989
    s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
990

    
991
    if(avctx->rc_max_rate && !avctx->rc_buffer_size){
992
        av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
993
        return -1;
994
    }
995

    
996
    if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
997
        av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
998
    }
999

    
1000
    if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1001
        av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1002
        return -1;
1003
    }
1004

    
1005
    if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1006
        av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1007
        return -1;
1008
    }
1009

    
1010
    if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1011
       && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1012
       && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1013

    
1014
        av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1015
    }
1016

    
1017
    if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1018
       && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1019
        av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1020
        return -1;
1021
    }
1022

    
1023
    if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1024
        av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1025
        return -1;
1026
    }
1027

    
1028
    if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1029
        av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1030
        return -1;
1031
    }
1032

    
1033
    if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1034
        av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1035
        return -1;
1036
    }
1037

    
1038
    if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1039
        av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1040
        return -1;
1041
    }
1042

    
1043
    if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1044
        av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1045
        return -1;
1046
    }
1047

    
1048
    if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1049
       && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1050
        av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1051
        return -1;
1052
    }
1053

    
1054
    if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1055
        av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1056
        return -1;
1057
    }
1058

    
1059
    if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1060
        av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1061
        return -1;
1062
    }
1063

    
1064
    if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1065
        av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1066
        return -1;
1067
    }
1068

    
1069
    if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1070
        av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1071
        return -1;
1072
    }
1073

    
1074
    if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1075
       && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1076
       && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1077
        av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1078
        return -1;
1079
    }
1080

    
1081
    if(s->avctx->thread_count > 1)
1082
        s->rtp_mode= 1;
1083

    
1084
    if(!avctx->time_base.den || !avctx->time_base.num){
1085
        av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1086
        return -1;
1087
    }
1088

    
1089
    i= (INT_MAX/2+128)>>8;
1090
    if(avctx->me_threshold >= i){
1091
        av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1092
        return -1;
1093
    }
1094
    if(avctx->mb_threshold >= i){
1095
        av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1096
        return -1;
1097
    }
1098

    
1099
    if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1100
        av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass\n");
1101
        return -1;
1102
    }
1103

    
1104
    i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1105
    if(i > 1){
1106
        av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1107
        avctx->time_base.den /= i;
1108
        avctx->time_base.num /= i;
1109
//        return -1;
1110
    }
1111

    
1112
    if(s->codec_id==CODEC_ID_MJPEG){
1113
        s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1114
        s->inter_quant_bias= 0;
1115
    }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1116
        s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1117
        s->inter_quant_bias= 0;
1118
    }else{
1119
        s->intra_quant_bias=0;
1120
        s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1121
    }
1122

    
1123
    if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1124
        s->intra_quant_bias= avctx->intra_quant_bias;
1125
    if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1126
        s->inter_quant_bias= avctx->inter_quant_bias;
1127

    
1128
    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1129

    
1130
    if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1131
        av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1132
        return -1;
1133
    }
1134
    s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1135

    
1136
    switch(avctx->codec->id) {
1137
    case CODEC_ID_MPEG1VIDEO:
1138
        s->out_format = FMT_MPEG1;
1139
        s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1140
        avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1141
        break;
1142
    case CODEC_ID_MPEG2VIDEO:
1143
        s->out_format = FMT_MPEG1;
1144
        s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1145
        avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1146
        s->rtp_mode= 1;
1147
        break;
1148
    case CODEC_ID_LJPEG:
1149
    case CODEC_ID_JPEGLS:
1150
    case CODEC_ID_MJPEG:
1151
        s->out_format = FMT_MJPEG;
1152
        s->intra_only = 1; /* force intra only for jpeg */
1153
        s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1154
        s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1155
        s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1156
        s->mjpeg_vsample[1] = 1;
1157
        s->mjpeg_vsample[2] = 1;
1158
        s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1159
        s->mjpeg_hsample[1] = 1;
1160
        s->mjpeg_hsample[2] = 1;
1161
        if (mjpeg_init(s) < 0)
1162
            return -1;
1163
        avctx->delay=0;
1164
        s->low_delay=1;
1165
        break;
1166
    case CODEC_ID_H261:
1167
        s->out_format = FMT_H261;
1168
        avctx->delay=0;
1169
        s->low_delay=1;
1170
        break;
1171
    case CODEC_ID_H263:
1172
        if (h263_get_picture_format(s->width, s->height) == 7) {
1173
            av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1174
            return -1;
1175
        }
1176
        s->out_format = FMT_H263;
1177
        s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1178
        avctx->delay=0;
1179
        s->low_delay=1;
1180
        break;
1181
    case CODEC_ID_H263P:
1182
        s->out_format = FMT_H263;
1183
        s->h263_plus = 1;
1184
        /* Fx */
1185
        s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1186
        s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1187
        s->modified_quant= s->h263_aic;
1188
        s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1189
        s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1190
        s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1191
        s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1192
        s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1193

    
1194
        /* /Fx */
1195
        /* These are just to be sure */
1196
        avctx->delay=0;
1197
        s->low_delay=1;
1198
        break;
1199
    case CODEC_ID_FLV1:
1200
        s->out_format = FMT_H263;
1201
        s->h263_flv = 2; /* format = 1; 11-bit codes */
1202
        s->unrestricted_mv = 1;
1203
        s->rtp_mode=0; /* don't allow GOB */
1204
        avctx->delay=0;
1205
        s->low_delay=1;
1206
        break;
1207
    case CODEC_ID_RV10:
1208
        s->out_format = FMT_H263;
1209
        avctx->delay=0;
1210
        s->low_delay=1;
1211
        break;
1212
    case CODEC_ID_RV20:
1213
        s->out_format = FMT_H263;
1214
        avctx->delay=0;
1215
        s->low_delay=1;
1216
        s->modified_quant=1;
1217
        s->h263_aic=1;
1218
        s->h263_plus=1;
1219
        s->loop_filter=1;
1220
        s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1221
        break;
1222
    case CODEC_ID_MPEG4:
1223
        s->out_format = FMT_H263;
1224
        s->h263_pred = 1;
1225
        s->unrestricted_mv = 1;
1226
        s->low_delay= s->max_b_frames ? 0 : 1;
1227
        avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1228
        break;
1229
    case CODEC_ID_MSMPEG4V1:
1230
        s->out_format = FMT_H263;
1231
        s->h263_msmpeg4 = 1;
1232
        s->h263_pred = 1;
1233
        s->unrestricted_mv = 1;
1234
        s->msmpeg4_version= 1;
1235
        avctx->delay=0;
1236
        s->low_delay=1;
1237
        break;
1238
    case CODEC_ID_MSMPEG4V2:
1239
        s->out_format = FMT_H263;
1240
        s->h263_msmpeg4 = 1;
1241
        s->h263_pred = 1;
1242
        s->unrestricted_mv = 1;
1243
        s->msmpeg4_version= 2;
1244
        avctx->delay=0;
1245
        s->low_delay=1;
1246
        break;
1247
    case CODEC_ID_MSMPEG4V3:
1248
        s->out_format = FMT_H263;
1249
        s->h263_msmpeg4 = 1;
1250
        s->h263_pred = 1;
1251
        s->unrestricted_mv = 1;
1252
        s->msmpeg4_version= 3;
1253
        s->flipflop_rounding=1;
1254
        avctx->delay=0;
1255
        s->low_delay=1;
1256
        break;
1257
    case CODEC_ID_WMV1:
1258
        s->out_format = FMT_H263;
1259
        s->h263_msmpeg4 = 1;
1260
        s->h263_pred = 1;
1261
        s->unrestricted_mv = 1;
1262
        s->msmpeg4_version= 4;
1263
        s->flipflop_rounding=1;
1264
        avctx->delay=0;
1265
        s->low_delay=1;
1266
        break;
1267
    case CODEC_ID_WMV2:
1268
        s->out_format = FMT_H263;
1269
        s->h263_msmpeg4 = 1;
1270
        s->h263_pred = 1;
1271
        s->unrestricted_mv = 1;
1272
        s->msmpeg4_version= 5;
1273
        s->flipflop_rounding=1;
1274
        avctx->delay=0;
1275
        s->low_delay=1;
1276
        break;
1277
    default:
1278
        return -1;
1279
    }
1280

    
1281
    avctx->has_b_frames= !s->low_delay;
1282

    
1283
    s->encoding = 1;
1284

    
1285
    /* init */
1286
    if (MPV_common_init(s) < 0)
1287
        return -1;
1288

    
1289
    if(s->modified_quant)
1290
        s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1291
    s->progressive_frame=
1292
    s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1293
    s->quant_precision=5;
1294

    
1295
    ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1296
    ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1297

    
1298
#ifdef CONFIG_H261_ENCODER
1299
    if (s->out_format == FMT_H261)
1300
        ff_h261_encode_init(s);
1301
#endif
1302
    if (s->out_format == FMT_H263)
1303
        h263_encode_init(s);
1304
    if(s->msmpeg4_version)
1305
        ff_msmpeg4_encode_init(s);
1306
    if (s->out_format == FMT_MPEG1)
1307
        ff_mpeg1_encode_init(s);
1308

    
1309
    /* init q matrix */
1310
    for(i=0;i<64;i++) {
1311
        int j= s->dsp.idct_permutation[i];
1312
        if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1313
            s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1314
            s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1315
        }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1316
            s->intra_matrix[j] =
1317
            s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1318
        }else
1319
        { /* mpeg1/2 */
1320
            s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1321
            s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1322
        }
1323
        if(s->avctx->intra_matrix)
1324
            s->intra_matrix[j] = s->avctx->intra_matrix[i];
1325
        if(s->avctx->inter_matrix)
1326
            s->inter_matrix[j] = s->avctx->inter_matrix[i];
1327
    }
1328

    
1329
    /* precompute matrix */
1330
    /* for mjpeg, we do include qscale in the matrix */
1331
    if (s->out_format != FMT_MJPEG) {
1332
        convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1333
                       s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1334
        convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1335
                       s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1336
    }
1337

    
1338
    if(ff_rate_control_init(s) < 0)
1339
        return -1;
1340

    
1341
    return 0;
1342
}
1343

    
1344
int MPV_encode_end(AVCodecContext *avctx)
1345
{
1346
    MpegEncContext *s = avctx->priv_data;
1347

    
1348
#ifdef STATS
1349
    print_stats();
1350
#endif
1351

    
1352
    ff_rate_control_uninit(s);
1353

    
1354
    MPV_common_end(s);
1355
    if (s->out_format == FMT_MJPEG)
1356
        mjpeg_close(s);
1357

    
1358
    av_freep(&avctx->extradata);
1359

    
1360
    return 0;
1361
}
1362

    
1363
#endif //CONFIG_ENCODERS
1364

    
1365
void init_rl(RLTable *rl, int use_static)
1366
{
1367
    int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1368
    uint8_t index_run[MAX_RUN+1];
1369
    int last, run, level, start, end, i;
1370

    
1371
    /* If table is static, we can quit if rl->max_level[0] is not NULL */
1372
    if(use_static && rl->max_level[0])
1373
        return;
1374

    
1375
    /* compute max_level[], max_run[] and index_run[] */
1376
    for(last=0;last<2;last++) {
1377
        if (last == 0) {
1378
            start = 0;
1379
            end = rl->last;
1380
        } else {
1381
            start = rl->last;
1382
            end = rl->n;
1383
        }
1384

    
1385
        memset(max_level, 0, MAX_RUN + 1);
1386
        memset(max_run, 0, MAX_LEVEL + 1);
1387
        memset(index_run, rl->n, MAX_RUN + 1);
1388
        for(i=start;i<end;i++) {
1389
            run = rl->table_run[i];
1390
            level = rl->table_level[i];
1391
            if (index_run[run] == rl->n)
1392
                index_run[run] = i;
1393
            if (level > max_level[run])
1394
                max_level[run] = level;
1395
            if (run > max_run[level])
1396
                max_run[level] = run;
1397
        }
1398
        if(use_static)
1399
            rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1400
        else
1401
            rl->max_level[last] = av_malloc(MAX_RUN + 1);
1402
        memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1403
        if(use_static)
1404
            rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1405
        else
1406
            rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1407
        memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1408
        if(use_static)
1409
            rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1410
        else
1411
            rl->index_run[last] = av_malloc(MAX_RUN + 1);
1412
        memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1413
    }
1414
}
1415

    
1416
/* draw the edges of width 'w' of an image of size width, height */
1417
//FIXME check that this is ok for mpeg4 interlaced
1418
static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1419
{
1420
    uint8_t *ptr, *last_line;
1421
    int i;
1422

    
1423
    last_line = buf + (height - 1) * wrap;
1424
    for(i=0;i<w;i++) {
1425
        /* top and bottom */
1426
        memcpy(buf - (i + 1) * wrap, buf, width);
1427
        memcpy(last_line + (i + 1) * wrap, last_line, width);
1428
    }
1429
    /* left and right */
1430
    ptr = buf;
1431
    for(i=0;i<height;i++) {
1432
        memset(ptr - w, ptr[0], w);
1433
        memset(ptr + width, ptr[width-1], w);
1434
        ptr += wrap;
1435
    }
1436
    /* corners */
1437
    for(i=0;i<w;i++) {
1438
        memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1439
        memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1440
        memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1441
        memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1442
    }
1443
}
1444

    
1445
int ff_find_unused_picture(MpegEncContext *s, int shared){
1446
    int i;
1447

    
1448
    if(shared){
1449
        for(i=0; i<MAX_PICTURE_COUNT; i++){
1450
            if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1451
        }
1452
    }else{
1453
        for(i=0; i<MAX_PICTURE_COUNT; i++){
1454
            if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1455
        }
1456
        for(i=0; i<MAX_PICTURE_COUNT; i++){
1457
            if(s->picture[i].data[0]==NULL) return i;
1458
        }
1459
    }
1460

    
1461
    assert(0);
1462
    return -1;
1463
}
1464

    
1465
static void update_noise_reduction(MpegEncContext *s){
1466
    int intra, i;
1467

    
1468
    for(intra=0; intra<2; intra++){
1469
        if(s->dct_count[intra] > (1<<16)){
1470
            for(i=0; i<64; i++){
1471
                s->dct_error_sum[intra][i] >>=1;
1472
            }
1473
            s->dct_count[intra] >>= 1;
1474
        }
1475

    
1476
        for(i=0; i<64; i++){
1477
            s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1478
        }
1479
    }
1480
}
1481

    
1482
/**
1483
 * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1484
 */
1485
int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1486
{
1487
    int i;
1488
    AVFrame *pic;
1489
    s->mb_skipped = 0;
1490

    
1491
    assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1492

    
1493
    /* mark&release old frames */
1494
    if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1495
        avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1496

    
1497
        /* release forgotten pictures */
1498
        /* if(mpeg124/h263) */
1499
        if(!s->encoding){
1500
            for(i=0; i<MAX_PICTURE_COUNT; i++){
1501
                if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1502
                    av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1503
                    avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1504
                }
1505
            }
1506
        }
1507
    }
1508
alloc:
1509
    if(!s->encoding){
1510
        /* release non reference frames */
1511
        for(i=0; i<MAX_PICTURE_COUNT; i++){
1512
            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1513
                s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1514
            }
1515
        }
1516

    
1517
        if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1518
            pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1519
        else{
1520
            i= ff_find_unused_picture(s, 0);
1521
            pic= (AVFrame*)&s->picture[i];
1522
        }
1523

    
1524
        pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1525
                        && !s->dropable ? 3 : 0;
1526

    
1527
        pic->coded_picture_number= s->coded_picture_number++;
1528

    
1529
        if( alloc_picture(s, (Picture*)pic, 0) < 0)
1530
            return -1;
1531

    
1532
        s->current_picture_ptr= (Picture*)pic;
1533
        s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1534
        s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1535
    }
1536

    
1537
    s->current_picture_ptr->pict_type= s->pict_type;
1538
//    if(s->flags && CODEC_FLAG_QSCALE)
1539
  //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1540
    s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1541

    
1542
    copy_picture(&s->current_picture, s->current_picture_ptr);
1543

    
1544
  if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1545
    if (s->pict_type != B_TYPE) {
1546
        s->last_picture_ptr= s->next_picture_ptr;
1547
        if(!s->dropable)
1548
            s->next_picture_ptr= s->current_picture_ptr;
1549
    }
1550
/*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1551
        s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1552
        s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1553
        s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1554
        s->pict_type, s->dropable);*/
1555

    
1556
    if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1557
    if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1558

    
1559
    if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1560
        av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1561
        assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1562
        goto alloc;
1563
    }
1564

    
1565
    assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1566

    
1567
    if(s->picture_structure!=PICT_FRAME){
1568
        int i;
1569
        for(i=0; i<4; i++){
1570
            if(s->picture_structure == PICT_BOTTOM_FIELD){
1571
                 s->current_picture.data[i] += s->current_picture.linesize[i];
1572
            }
1573
            s->current_picture.linesize[i] *= 2;
1574
            s->last_picture.linesize[i] *=2;
1575
            s->next_picture.linesize[i] *=2;
1576
        }
1577
    }
1578
  }
1579

    
1580
    s->hurry_up= s->avctx->hurry_up;
1581
    s->error_resilience= avctx->error_resilience;
1582

    
1583
    /* set dequantizer, we can't do it during init as it might change for mpeg4
1584
       and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1585
    if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1586
        s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1587
        s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1588
    }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1589
        s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1590
        s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1591
    }else{
1592
        s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1593
        s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1594
    }
1595

    
1596
    if(s->dct_error_sum){
1597
        assert(s->avctx->noise_reduction && s->encoding);
1598

    
1599
        update_noise_reduction(s);
1600
    }
1601

    
1602
#ifdef HAVE_XVMC
1603
    if(s->avctx->xvmc_acceleration)
1604
        return XVMC_field_start(s, avctx);
1605
#endif
1606
    return 0;
1607
}
1608

    
1609
/* generic function for encode/decode called after a frame has been coded/decoded */
1610
void MPV_frame_end(MpegEncContext *s)
1611
{
1612
    int i;
1613
    /* draw edge for correct motion prediction if outside */
1614
#ifdef HAVE_XVMC
1615
//just to make sure that all data is rendered.
1616
    if(s->avctx->xvmc_acceleration){
1617
        XVMC_field_end(s);
1618
    }else
1619
#endif
1620
    if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1621
            draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1622
            draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1623
            draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1624
    }
1625
    emms_c();
1626

    
1627
    s->last_pict_type    = s->pict_type;
1628
    s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1629
    if(s->pict_type!=B_TYPE){
1630
        s->last_non_b_pict_type= s->pict_type;
1631
    }
1632
#if 0
1633
        /* copy back current_picture variables */
1634
    for(i=0; i<MAX_PICTURE_COUNT; i++){
1635
        if(s->picture[i].data[0] == s->current_picture.data[0]){
1636
            s->picture[i]= s->current_picture;
1637
            break;
1638
        }
1639
    }
1640
    assert(i<MAX_PICTURE_COUNT);
1641
#endif
1642

    
1643
    if(s->encoding){
1644
        /* release non-reference frames */
1645
        for(i=0; i<MAX_PICTURE_COUNT; i++){
1646
            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1647
                s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1648
            }
1649
        }
1650
    }
1651
    // clear copies, to avoid confusion
1652
#if 0
1653
    memset(&s->last_picture, 0, sizeof(Picture));
1654
    memset(&s->next_picture, 0, sizeof(Picture));
1655
    memset(&s->current_picture, 0, sizeof(Picture));
1656
#endif
1657
    s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1658
}
1659

    
1660
/**
1661
 * draws an line from (ex, ey) -> (sx, sy).
1662
 * @param w width of the image
1663
 * @param h height of the image
1664
 * @param stride stride/linesize of the image
1665
 * @param color color of the arrow
1666
 */
1667
static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1668
    int t, x, y, fr, f;
1669

    
1670
    sx= clip(sx, 0, w-1);
1671
    sy= clip(sy, 0, h-1);
1672
    ex= clip(ex, 0, w-1);
1673
    ey= clip(ey, 0, h-1);
1674

    
1675
    buf[sy*stride + sx]+= color;
1676

    
1677
    if(ABS(ex - sx) > ABS(ey - sy)){
1678
        if(sx > ex){
1679
            t=sx; sx=ex; ex=t;
1680
            t=sy; sy=ey; ey=t;
1681
        }
1682
        buf+= sx + sy*stride;
1683
        ex-= sx;
1684
        f= ((ey-sy)<<16)/ex;
1685
        for(x= 0; x <= ex; x++){
1686
            y = (x*f)>>16;
1687
            fr= (x*f)&0xFFFF;
1688
            buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1689
            buf[(y+1)*stride + x]+= (color*         fr )>>16;
1690
        }
1691
    }else{
1692
        if(sy > ey){
1693
            t=sx; sx=ex; ex=t;
1694
            t=sy; sy=ey; ey=t;
1695
        }
1696
        buf+= sx + sy*stride;
1697
        ey-= sy;
1698
        if(ey) f= ((ex-sx)<<16)/ey;
1699
        else   f= 0;
1700
        for(y= 0; y <= ey; y++){
1701
            x = (y*f)>>16;
1702
            fr= (y*f)&0xFFFF;
1703
            buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1704
            buf[y*stride + x+1]+= (color*         fr )>>16;;
1705
        }
1706
    }
1707
}
1708

    
1709
/**
1710
 * draws an arrow from (ex, ey) -> (sx, sy).
1711
 * @param w width of the image
1712
 * @param h height of the image
1713
 * @param stride stride/linesize of the image
1714
 * @param color color of the arrow
1715
 */
1716
static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1717
    int dx,dy;
1718

    
1719
    sx= clip(sx, -100, w+100);
1720
    sy= clip(sy, -100, h+100);
1721
    ex= clip(ex, -100, w+100);
1722
    ey= clip(ey, -100, h+100);
1723

    
1724
    dx= ex - sx;
1725
    dy= ey - sy;
1726

    
1727
    if(dx*dx + dy*dy > 3*3){
1728
        int rx=  dx + dy;
1729
        int ry= -dx + dy;
1730
        int length= ff_sqrt((rx*rx + ry*ry)<<8);
1731

    
1732
        //FIXME subpixel accuracy
1733
        rx= ROUNDED_DIV(rx*3<<4, length);
1734
        ry= ROUNDED_DIV(ry*3<<4, length);
1735

    
1736
        draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1737
        draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1738
    }
1739
    draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1740
}
1741

    
1742
/**
1743
 * prints debuging info for the given picture.
1744
 */
1745
void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1746

    
1747
    if(!pict || !pict->mb_type) return;
1748

    
1749
    if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1750
        int x,y;
1751

    
1752
        av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1753
        switch (pict->pict_type) {
1754
            case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1755
            case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1756
            case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1757
            case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1758
            case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1759
            case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1760
        }
1761
        for(y=0; y<s->mb_height; y++){
1762
            for(x=0; x<s->mb_width; x++){
1763
                if(s->avctx->debug&FF_DEBUG_SKIP){
1764
                    int count= s->mbskip_table[x + y*s->mb_stride];
1765
                    if(count>9) count=9;
1766
                    av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1767
                }
1768
                if(s->avctx->debug&FF_DEBUG_QP){
1769
                    av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1770
                }
1771
                if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1772
                    int mb_type= pict->mb_type[x + y*s->mb_stride];
1773
                    //Type & MV direction
1774
                    if(IS_PCM(mb_type))
1775
                        av_log(s->avctx, AV_LOG_DEBUG, "P");
1776
                    else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1777
                        av_log(s->avctx, AV_LOG_DEBUG, "A");
1778
                    else if(IS_INTRA4x4(mb_type))
1779
                        av_log(s->avctx, AV_LOG_DEBUG, "i");
1780
                    else if(IS_INTRA16x16(mb_type))
1781
                        av_log(s->avctx, AV_LOG_DEBUG, "I");
1782
                    else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1783
                        av_log(s->avctx, AV_LOG_DEBUG, "d");
1784
                    else if(IS_DIRECT(mb_type))
1785
                        av_log(s->avctx, AV_LOG_DEBUG, "D");
1786
                    else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1787
                        av_log(s->avctx, AV_LOG_DEBUG, "g");
1788
                    else if(IS_GMC(mb_type))
1789
                        av_log(s->avctx, AV_LOG_DEBUG, "G");
1790
                    else if(IS_SKIP(mb_type))
1791
                        av_log(s->avctx, AV_LOG_DEBUG, "S");
1792
                    else if(!USES_LIST(mb_type, 1))
1793
                        av_log(s->avctx, AV_LOG_DEBUG, ">");
1794
                    else if(!USES_LIST(mb_type, 0))
1795
                        av_log(s->avctx, AV_LOG_DEBUG, "<");
1796
                    else{
1797
                        assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1798
                        av_log(s->avctx, AV_LOG_DEBUG, "X");
1799
                    }
1800

    
1801
                    //segmentation
1802
                    if(IS_8X8(mb_type))
1803
                        av_log(s->avctx, AV_LOG_DEBUG, "+");
1804
                    else if(IS_16X8(mb_type))
1805
                        av_log(s->avctx, AV_LOG_DEBUG, "-");
1806
                    else if(IS_8X16(mb_type))
1807
                        av_log(s->avctx, AV_LOG_DEBUG, "|");
1808
                    else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1809
                        av_log(s->avctx, AV_LOG_DEBUG, " ");
1810
                    else
1811
                        av_log(s->avctx, AV_LOG_DEBUG, "?");
1812

    
1813

    
1814
                    if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1815
                        av_log(s->avctx, AV_LOG_DEBUG, "=");
1816
                    else
1817
                        av_log(s->avctx, AV_LOG_DEBUG, " ");
1818
                }
1819
//                av_log(s->avctx, AV_LOG_DEBUG, " ");
1820
            }
1821
            av_log(s->avctx, AV_LOG_DEBUG, "\n");
1822
        }
1823
    }
1824

    
1825
    if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1826
        const int shift= 1 + s->quarter_sample;
1827
        int mb_y;
1828
        uint8_t *ptr;
1829
        int i;
1830
        int h_chroma_shift, v_chroma_shift;
1831
        const int width = s->avctx->width;
1832
        const int height= s->avctx->height;
1833
        const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1834
        const int mv_stride= (s->mb_width << mv_sample_log2) + 1;
1835
        s->low_delay=0; //needed to see the vectors without trashing the buffers
1836

    
1837
        avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1838
        for(i=0; i<3; i++){
1839
            memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1840
            pict->data[i]= s->visualization_buffer[i];
1841
        }
1842
        pict->type= FF_BUFFER_TYPE_COPY;
1843
        ptr= pict->data[0];
1844

    
1845
        for(mb_y=0; mb_y<s->mb_height; mb_y++){
1846
            int mb_x;
1847
            for(mb_x=0; mb_x<s->mb_width; mb_x++){
1848
                const int mb_index= mb_x + mb_y*s->mb_stride;
1849
                if((s->avctx->debug_mv) && pict->motion_val){
1850
                  int type;
1851
                  for(type=0; type<3; type++){
1852
                    int direction = 0;
1853
                    switch (type) {
1854
                      case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1855
                                continue;
1856
                              direction = 0;
1857
                              break;
1858
                      case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1859
                                continue;
1860
                              direction = 0;
1861
                              break;
1862
                      case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1863
                                continue;
1864
                              direction = 1;
1865
                              break;
1866
                    }
1867
                    if(!USES_LIST(pict->mb_type[mb_index], direction))
1868
                        continue;
1869

    
1870
                    if(IS_8X8(pict->mb_type[mb_index])){
1871
                      int i;
1872
                      for(i=0; i<4; i++){
1873
                        int sx= mb_x*16 + 4 + 8*(i&1);
1874
                        int sy= mb_y*16 + 4 + 8*(i>>1);
1875
                        int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1876
                        int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1877
                        int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1878
                        draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1879
                      }
1880
                    }else if(IS_16X8(pict->mb_type[mb_index])){
1881
                      int i;
1882
                      for(i=0; i<2; i++){
1883
                        int sx=mb_x*16 + 8;
1884
                        int sy=mb_y*16 + 4 + 8*i;
1885
                        int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1886
                        int mx=(pict->motion_val[direction][xy][0]>>shift);
1887
                        int my=(pict->motion_val[direction][xy][1]>>shift);
1888

    
1889
                        if(IS_INTERLACED(pict->mb_type[mb_index]))
1890
                            my*=2;
1891

    
1892
                        draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1893
                      }
1894
                    }else if(IS_8X16(pict->mb_type[mb_index])){
1895
                      int i;
1896
                      for(i=0; i<2; i++){
1897
                        int sx=mb_x*16 + 4 + 8*i;
1898
                        int sy=mb_y*16 + 8;
1899
                        int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1900
                        int mx=(pict->motion_val[direction][xy][0]>>shift);
1901
                        int my=(pict->motion_val[direction][xy][1]>>shift);
1902

    
1903
                        if(IS_INTERLACED(pict->mb_type[mb_index]))
1904
                            my*=2;
1905

    
1906
                        draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1907
                      }
1908
                    }else{
1909
                      int sx= mb_x*16 + 8;
1910
                      int sy= mb_y*16 + 8;
1911
                      int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1912
                      int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1913
                      int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1914
                      draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1915
                    }
1916
                  }
1917
                }
1918
                if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1919
                    uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1920
                    int y;
1921
                    for(y=0; y<8; y++){
1922
                        *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1923
                        *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1924
                    }
1925
                }
1926
                if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1927
                    int mb_type= pict->mb_type[mb_index];
1928
                    uint64_t u,v;
1929
                    int y;
1930
#define COLOR(theta, r)\
1931
u= (int)(128 + r*cos(theta*3.141592/180));\
1932
v= (int)(128 + r*sin(theta*3.141592/180));
1933

    
1934

    
1935
                    u=v=128;
1936
                    if(IS_PCM(mb_type)){
1937
                        COLOR(120,48)
1938
                    }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1939
                        COLOR(30,48)
1940
                    }else if(IS_INTRA4x4(mb_type)){
1941
                        COLOR(90,48)
1942
                    }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1943
//                        COLOR(120,48)
1944
                    }else if(IS_DIRECT(mb_type)){
1945
                        COLOR(150,48)
1946
                    }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1947
                        COLOR(170,48)
1948
                    }else if(IS_GMC(mb_type)){
1949
                        COLOR(190,48)
1950
                    }else if(IS_SKIP(mb_type)){
1951
//                        COLOR(180,48)
1952
                    }else if(!USES_LIST(mb_type, 1)){
1953
                        COLOR(240,48)
1954
                    }else if(!USES_LIST(mb_type, 0)){
1955
                        COLOR(0,48)
1956
                    }else{
1957
                        assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1958
                        COLOR(300,48)
1959
                    }
1960

    
1961
                    u*= 0x0101010101010101ULL;
1962
                    v*= 0x0101010101010101ULL;
1963
                    for(y=0; y<8; y++){
1964
                        *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1965
                        *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1966
                    }
1967

    
1968
                    //segmentation
1969
                    if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1970
                        *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1971
                        *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1972
                    }
1973
                    if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1974
                        for(y=0; y<16; y++)
1975
                            pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1976
                    }
1977
                    if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1978
                        int dm= 1 << (mv_sample_log2-2);
1979
                        for(i=0; i<4; i++){
1980
                            int sx= mb_x*16 + 8*(i&1);
1981
                            int sy= mb_y*16 + 8*(i>>1);
1982
                            int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1983
                            //FIXME bidir
1984
                            int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1985
                            if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1986
                                for(y=0; y<8; y++)
1987
                                    pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1988
                            if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1989
                                *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1990
                        }
1991
                    }
1992

    
1993
                    if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1994
                        // hmm
1995
                    }
1996
                }
1997
                s->mbskip_table[mb_index]=0;
1998
            }
1999
        }
2000
    }
2001
}
2002

    
2003
#ifdef CONFIG_ENCODERS
2004

    
2005
static int get_sae(uint8_t *src, int ref, int stride){
2006
    int x,y;
2007
    int acc=0;
2008

    
2009
    for(y=0; y<16; y++){
2010
        for(x=0; x<16; x++){
2011
            acc+= ABS(src[x+y*stride] - ref);
2012
        }
2013
    }
2014

    
2015
    return acc;
2016
}
2017

    
2018
static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2019
    int x, y, w, h;
2020
    int acc=0;
2021

    
2022
    w= s->width &~15;
2023
    h= s->height&~15;
2024

    
2025
    for(y=0; y<h; y+=16){
2026
        for(x=0; x<w; x+=16){
2027
            int offset= x + y*stride;
2028
            int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2029
            int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2030
            int sae = get_sae(src + offset, mean, stride);
2031

    
2032
            acc+= sae + 500 < sad;
2033
        }
2034
    }
2035
    return acc;
2036
}
2037

    
2038

    
2039
static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2040
    AVFrame *pic=NULL;
2041
    int64_t pts;
2042
    int i;
2043
    const int encoding_delay= s->max_b_frames;
2044
    int direct=1;
2045

    
2046
    if(pic_arg){
2047
        pts= pic_arg->pts;
2048
        pic_arg->display_picture_number= s->input_picture_number++;
2049

    
2050
        if(pts != AV_NOPTS_VALUE){
2051
            if(s->user_specified_pts != AV_NOPTS_VALUE){
2052
                int64_t time= pts;
2053
                int64_t last= s->user_specified_pts;
2054

    
2055
                if(time <= last){
2056
                    av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2057
                    return -1;
2058
                }
2059
            }
2060
            s->user_specified_pts= pts;
2061
        }else{
2062
            if(s->user_specified_pts != AV_NOPTS_VALUE){
2063
                s->user_specified_pts=
2064
                pts= s->user_specified_pts + 1;
2065
                av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2066
            }else{
2067
                pts= pic_arg->display_picture_number;
2068
            }
2069
        }
2070
    }
2071

    
2072
  if(pic_arg){
2073
    if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2074
    if(pic_arg->linesize[0] != s->linesize) direct=0;
2075
    if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2076
    if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2077

    
2078
//    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2079

    
2080
    if(direct){
2081
        i= ff_find_unused_picture(s, 1);
2082

    
2083
        pic= (AVFrame*)&s->picture[i];
2084
        pic->reference= 3;
2085

    
2086
        for(i=0; i<4; i++){
2087
            pic->data[i]= pic_arg->data[i];
2088
            pic->linesize[i]= pic_arg->linesize[i];
2089
        }
2090
        alloc_picture(s, (Picture*)pic, 1);
2091
    }else{
2092
        i= ff_find_unused_picture(s, 0);
2093

    
2094
        pic= (AVFrame*)&s->picture[i];
2095
        pic->reference= 3;
2096

    
2097
        alloc_picture(s, (Picture*)pic, 0);
2098

    
2099
        if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2100
           && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2101
           && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2102
       // empty
2103
        }else{
2104
            int h_chroma_shift, v_chroma_shift;
2105
            avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2106

    
2107
            for(i=0; i<3; i++){
2108
                int src_stride= pic_arg->linesize[i];
2109
                int dst_stride= i ? s->uvlinesize : s->linesize;
2110
                int h_shift= i ? h_chroma_shift : 0;
2111
                int v_shift= i ? v_chroma_shift : 0;
2112
                int w= s->width >>h_shift;
2113
                int h= s->height>>v_shift;
2114
                uint8_t *src= pic_arg->data[i];
2115
                uint8_t *dst= pic->data[i] + INPLACE_OFFSET;
2116

    
2117
                if(src_stride==dst_stride)
2118
                    memcpy(dst, src, src_stride*h);
2119
                else{
2120
                    while(h--){
2121
                        memcpy(dst, src, w);
2122
                        dst += dst_stride;
2123
                        src += src_stride;
2124
                    }
2125
                }
2126
            }
2127
        }
2128
    }
2129
    copy_picture_attributes(s, pic, pic_arg);
2130
    pic->pts= pts; //we set this here to avoid modifiying pic_arg
2131
  }
2132

    
2133
    /* shift buffer entries */
2134
    for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2135
        s->input_picture[i-1]= s->input_picture[i];
2136

    
2137
    s->input_picture[encoding_delay]= (Picture*)pic;
2138

    
2139
    return 0;
2140
}
2141

    
2142
static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2143
    int x, y, plane;
2144
    int score=0;
2145
    int64_t score64=0;
2146

    
2147
    for(plane=0; plane<3; plane++){
2148
        const int stride= p->linesize[plane];
2149
        const int bw= plane ? 1 : 2;
2150
        for(y=0; y<s->mb_height*bw; y++){
2151
            for(x=0; x<s->mb_width*bw; x++){
2152
                int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2153
                int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2154

    
2155
                switch(s->avctx->frame_skip_exp){
2156
                    case 0: score= FFMAX(score, v); break;
2157
                    case 1: score+= ABS(v);break;
2158
                    case 2: score+= v*v;break;
2159
                    case 3: score64+= ABS(v*v*(int64_t)v);break;
2160
                    case 4: score64+= v*v*(int64_t)(v*v);break;
2161
                }
2162
            }
2163
        }
2164
    }
2165

    
2166
    if(score) score64= score;
2167

    
2168
    if(score64 < s->avctx->frame_skip_threshold)
2169
        return 1;
2170
    if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2171
        return 1;
2172
    return 0;
2173
}
2174

    
2175
static int estimate_best_b_count(MpegEncContext *s){
2176
    AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2177
    AVCodecContext *c= avcodec_alloc_context();
2178
    AVFrame input[FF_MAX_B_FRAMES+2];
2179
    const int scale= s->avctx->brd_scale;
2180
    int i, j, out_size, p_lambda, b_lambda, lambda2;
2181
    int outbuf_size= s->width * s->height; //FIXME
2182
    uint8_t *outbuf= av_malloc(outbuf_size);
2183
    ImgReSampleContext *resample;
2184
    int64_t best_rd= INT64_MAX;
2185
    int best_b_count= -1;
2186

    
2187
//    emms_c();
2188
    p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2189
    b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2190
    if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2191
    lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2192

    
2193
    c->width = s->width >> scale;
2194
    c->height= s->height>> scale;
2195
    c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2196
    c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2197
    c->mb_decision= s->avctx->mb_decision;
2198
    c->me_cmp= s->avctx->me_cmp;
2199
    c->mb_cmp= s->avctx->mb_cmp;
2200
    c->me_sub_cmp= s->avctx->me_sub_cmp;
2201
    c->pix_fmt = PIX_FMT_YUV420P;
2202
    c->time_base= s->avctx->time_base;
2203
    c->max_b_frames= s->max_b_frames;
2204

    
2205
    if (avcodec_open(c, codec) < 0)
2206
        return -1;
2207

    
2208
    resample= img_resample_init(c->width, c->height, s->width, s->height); //FIXME use sws
2209

    
2210
    for(i=0; i<s->max_b_frames+2; i++){
2211
        int ysize= c->width*c->height;
2212
        int csize= (c->width/2)*(c->height/2);
2213
        Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2214

    
2215
        if(pre_input_ptr)
2216
            pre_input= *pre_input_ptr;
2217

    
2218
        if(pre_input.type != FF_BUFFER_TYPE_SHARED && i){
2219
            pre_input.data[0]+=INPLACE_OFFSET;
2220
            pre_input.data[1]+=INPLACE_OFFSET;
2221
            pre_input.data[2]+=INPLACE_OFFSET;
2222
        }
2223

    
2224
        avcodec_get_frame_defaults(&input[i]);
2225
        input[i].data[0]= av_malloc(ysize + 2*csize);
2226
        input[i].data[1]= input[i].data[0] + ysize;
2227
        input[i].data[2]= input[i].data[1] + csize;
2228
        input[i].linesize[0]= c->width;
2229
        input[i].linesize[1]=
2230
        input[i].linesize[2]= c->width/2;
2231

    
2232
        if(!i || s->input_picture[i-1])
2233
            img_resample(resample, (AVPicture*)&input[i],
2234
                         (AVPicture*)&pre_input);
2235
    }
2236

    
2237
    for(j=0; j<s->max_b_frames+1; j++){
2238
        int64_t rd=0;
2239

    
2240
        if(!s->input_picture[j])
2241
            break;
2242

    
2243
        c->error[0]= c->error[1]= c->error[2]= 0;
2244

    
2245
        input[0].pict_type= I_TYPE;
2246
        input[0].quality= 1 * FF_QP2LAMBDA;
2247
        out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2248
//        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2249

    
2250
        for(i=0; i<s->max_b_frames+1; i++){
2251
            int is_p= i % (j+1) == j || i==s->max_b_frames;
2252

    
2253
            input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2254
            input[i+1].quality= is_p ? p_lambda : b_lambda;
2255
            out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2256
            rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2257
        }
2258

    
2259
        /* get the delayed frames */
2260
        while(out_size){
2261
            out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2262
            rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2263
        }
2264

    
2265
        rd += c->error[0] + c->error[1] + c->error[2];
2266

    
2267
        if(rd < best_rd){
2268
            best_rd= rd;
2269
            best_b_count= j;
2270
        }
2271
    }
2272

    
2273
    av_freep(&outbuf);
2274
    avcodec_close(c);
2275
    av_freep(&c);
2276
    img_resample_close(resample);
2277

    
2278
    for(i=0; i<s->max_b_frames+2; i++){
2279
        av_freep(&input[i].data[0]);
2280
    }
2281

    
2282
    return best_b_count;
2283
}
2284

    
2285
static void select_input_picture(MpegEncContext *s){
2286
    int i;
2287

    
2288
    for(i=1; i<MAX_PICTURE_COUNT; i++)
2289
        s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2290
    s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2291

    
2292
    /* set next picture type & ordering */
2293
    if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2294
        if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2295
            s->reordered_input_picture[0]= s->input_picture[0];
2296
            s->reordered_input_picture[0]->pict_type= I_TYPE;
2297
            s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2298
        }else{
2299
            int b_frames;
2300

    
2301
            if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2302
                if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2303
                //FIXME check that te gop check above is +-1 correct
2304
//av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2305

    
2306
                    if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2307
                        for(i=0; i<4; i++)
2308
                            s->input_picture[0]->data[i]= NULL;
2309
                        s->input_picture[0]->type= 0;
2310
                    }else{
2311
                        assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2312
                               || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2313

    
2314
                        s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2315
                    }
2316

    
2317
                    emms_c();
2318
                    ff_vbv_update(s, 0);
2319

    
2320
                    goto no_output_pic;
2321
                }
2322
            }
2323

    
2324
            if(s->flags&CODEC_FLAG_PASS2){
2325
                for(i=0; i<s->max_b_frames+1; i++){
2326
                    int pict_num= s->input_picture[0]->display_picture_number + i;
2327

    
2328
                    if(pict_num >= s->rc_context.num_entries)
2329
                        break;
2330
                    if(!s->input_picture[i]){
2331
                        s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2332
                        break;
2333
                    }
2334

    
2335
                    s->input_picture[i]->pict_type=
2336
                        s->rc_context.entry[pict_num].new_pict_type;
2337
                }
2338
            }
2339

    
2340
            if(s->avctx->b_frame_strategy==0){
2341
                b_frames= s->max_b_frames;
2342
                while(b_frames && !s->input_picture[b_frames]) b_frames--;
2343
            }else if(s->avctx->b_frame_strategy==1){
2344
                for(i=1; i<s->max_b_frames+1; i++){
2345
                    if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2346
                        s->input_picture[i]->b_frame_score=
2347
                            get_intra_count(s, s->input_picture[i  ]->data[0],
2348
                                               s->input_picture[i-1]->data[0], s->linesize) + 1;
2349
                    }
2350
                }
2351
                for(i=0; i<s->max_b_frames+1; i++){
2352
                    if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2353
                }
2354

    
2355
                b_frames= FFMAX(0, i-1);
2356

    
2357
                /* reset scores */
2358
                for(i=0; i<b_frames+1; i++){
2359
                    s->input_picture[i]->b_frame_score=0;
2360
                }
2361
            }else if(s->avctx->b_frame_strategy==2){
2362
                b_frames= estimate_best_b_count(s);
2363
            }else{
2364
                av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2365
                b_frames=0;
2366
            }
2367

    
2368
            emms_c();
2369
//static int b_count=0;
2370
//b_count+= b_frames;
2371
//av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2372

    
2373
            for(i= b_frames - 1; i>=0; i--){
2374
                int type= s->input_picture[i]->pict_type;
2375
                if(type && type != B_TYPE)
2376
                    b_frames= i;
2377
            }
2378
            if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2379
                av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2380
            }
2381

    
2382
            if(s->picture_in_gop_number + b_frames >= s->gop_size){
2383
              if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2384
                    b_frames= s->gop_size - s->picture_in_gop_number - 1;
2385
              }else{
2386
                if(s->flags & CODEC_FLAG_CLOSED_GOP)
2387
                    b_frames=0;
2388
                s->input_picture[b_frames]->pict_type= I_TYPE;
2389
              }
2390
            }
2391

    
2392
            if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2393
               && b_frames
2394
               && s->input_picture[b_frames]->pict_type== I_TYPE)
2395
                b_frames--;
2396

    
2397
            s->reordered_input_picture[0]= s->input_picture[b_frames];
2398
            if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2399
                s->reordered_input_picture[0]->pict_type= P_TYPE;
2400
            s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2401
            for(i=0; i<b_frames; i++){
2402
                s->reordered_input_picture[i+1]= s->input_picture[i];
2403
                s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2404
                s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2405
            }
2406
        }
2407
    }
2408
no_output_pic:
2409
    if(s->reordered_input_picture[0]){
2410
        s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2411

    
2412
        copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2413

    
2414
        if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2415
            // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2416

    
2417
            int i= ff_find_unused_picture(s, 0);
2418
            Picture *pic= &s->picture[i];
2419

    
2420
            /* mark us unused / free shared pic */
2421
            for(i=0; i<4; i++)
2422
                s->reordered_input_picture[0]->data[i]= NULL;
2423
            s->reordered_input_picture[0]->type= 0;
2424

    
2425
            pic->reference              = s->reordered_input_picture[0]->reference;
2426

    
2427
            alloc_picture(s, pic, 0);
2428

    
2429
            copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2430

    
2431
            s->current_picture_ptr= pic;
2432
        }else{
2433
            // input is not a shared pix -> reuse buffer for current_pix
2434

    
2435
            assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2436
                   || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2437

    
2438
            s->current_picture_ptr= s->reordered_input_picture[0];
2439
            for(i=0; i<4; i++){
2440
                s->new_picture.data[i]+= INPLACE_OFFSET;
2441
            }
2442
        }
2443
        copy_picture(&s->current_picture, s->current_picture_ptr);
2444

    
2445
        s->picture_number= s->new_picture.display_picture_number;
2446
//printf("dpn:%d\n", s->picture_number);
2447
    }else{
2448
       memset(&s->new_picture, 0, sizeof(Picture));
2449
    }
2450
}
2451

    
2452
int MPV_encode_picture(AVCodecContext *avctx,
2453
                       unsigned char *buf, int buf_size, void *data)
2454
{
2455
    MpegEncContext *s = avctx->priv_data;
2456
    AVFrame *pic_arg = data;
2457
    int i, stuffing_count;
2458

    
2459
    if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
2460
        av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2461
        return -1;
2462
    }
2463

    
2464
    for(i=0; i<avctx->thread_count; i++){
2465
        int start_y= s->thread_context[i]->start_mb_y;
2466
        int   end_y= s->thread_context[i]->  end_mb_y;
2467
        int h= s->mb_height;
2468
        uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2469
        uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2470

    
2471
        init_put_bits(&s->thread_context[i]->pb, start, end - start);
2472
    }
2473

    
2474
    s->picture_in_gop_number++;
2475

    
2476
    if(load_input_picture(s, pic_arg) < 0)
2477
        return -1;
2478

    
2479
    select_input_picture(s);
2480

    
2481
    /* output? */
2482
    if(s->new_picture.data[0]){
2483
        s->pict_type= s->new_picture.pict_type;
2484
//emms_c();
2485
//printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2486
        MPV_frame_start(s, avctx);
2487

    
2488
        encode_picture(s, s->picture_number);
2489

    
2490
        avctx->real_pict_num  = s->picture_number;
2491
        avctx->header_bits = s->header_bits;
2492
        avctx->mv_bits     = s->mv_bits;
2493
        avctx->misc_bits   = s->misc_bits;
2494
        avctx->i_tex_bits  = s->i_tex_bits;
2495
        avctx->p_tex_bits  = s->p_tex_bits;
2496
        avctx->i_count     = s->i_count;
2497
        avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2498
        avctx->skip_count  = s->skip_count;
2499

    
2500
        MPV_frame_end(s);
2501

    
2502
        if (s->out_format == FMT_MJPEG)
2503
            mjpeg_picture_trailer(s);
2504

    
2505
        if(s->flags&CODEC_FLAG_PASS1)
2506
            ff_write_pass1_stats(s);
2507

    
2508
        for(i=0; i<4; i++){
2509
            s->current_picture_ptr->error[i]= s->current_picture.error[i];
2510
            avctx->error[i] += s->current_picture_ptr->error[i];
2511
        }
2512

    
2513
        if(s->flags&CODEC_FLAG_PASS1)
2514
            assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2515
        flush_put_bits(&s->pb);
2516
        s->frame_bits  = put_bits_count(&s->pb);
2517

    
2518
        stuffing_count= ff_vbv_update(s, s->frame_bits);
2519
        if(stuffing_count){
2520
            if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2521
                av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2522
                return -1;
2523
            }
2524

    
2525
            switch(s->codec_id){
2526
            case CODEC_ID_MPEG1VIDEO:
2527
            case CODEC_ID_MPEG2VIDEO:
2528
                while(stuffing_count--){
2529
                    put_bits(&s->pb, 8, 0);
2530
                }
2531
            break;
2532
            case CODEC_ID_MPEG4:
2533
                put_bits(&s->pb, 16, 0);
2534
                put_bits(&s->pb, 16, 0x1C3);
2535
                stuffing_count -= 4;
2536
                while(stuffing_count--){
2537
                    put_bits(&s->pb, 8, 0xFF);
2538
                }
2539
            break;
2540
            default:
2541
                av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2542
            }
2543
            flush_put_bits(&s->pb);
2544
            s->frame_bits  = put_bits_count(&s->pb);
2545
        }
2546

    
2547
        /* update mpeg1/2 vbv_delay for CBR */
2548
        if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2549
           && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2550
            int vbv_delay;
2551

    
2552
            assert(s->repeat_first_field==0);
2553

    
2554
            vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2555
            assert(vbv_delay < 0xFFFF);
2556

    
2557
            s->vbv_delay_ptr[0] &= 0xF8;
2558
            s->vbv_delay_ptr[0] |= vbv_delay>>13;
2559
            s->vbv_delay_ptr[1]  = vbv_delay>>5;
2560
            s->vbv_delay_ptr[2] &= 0x07;
2561
            s->vbv_delay_ptr[2] |= vbv_delay<<3;
2562
        }
2563
        s->total_bits += s->frame_bits;
2564
        avctx->frame_bits  = s->frame_bits;
2565
    }else{
2566
        assert((pbBufPtr(&s->pb) == s->pb.buf));
2567
        s->frame_bits=0;
2568
    }
2569
    assert((s->frame_bits&7)==0);
2570

    
2571
    return s->frame_bits/8;
2572
}
2573

    
2574
#endif //CONFIG_ENCODERS
2575

    
2576
static inline void gmc1_motion(MpegEncContext *s,
2577
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2578
                               uint8_t **ref_picture)
2579
{
2580
    uint8_t *ptr;
2581
    int offset, src_x, src_y, linesize, uvlinesize;
2582
    int motion_x, motion_y;
2583
    int emu=0;
2584

    
2585
    motion_x= s->sprite_offset[0][0];
2586
    motion_y= s->sprite_offset[0][1];
2587
    src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2588
    src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2589
    motion_x<<=(3-s->sprite_warping_accuracy);
2590
    motion_y<<=(3-s->sprite_warping_accuracy);
2591
    src_x = clip(src_x, -16, s->width);
2592
    if (src_x == s->width)
2593
        motion_x =0;
2594
    src_y = clip(src_y, -16, s->height);
2595
    if (src_y == s->height)
2596
        motion_y =0;
2597

    
2598
    linesize = s->linesize;
2599
    uvlinesize = s->uvlinesize;
2600

    
2601
    ptr = ref_picture[0] + (src_y * linesize) + src_x;
2602

    
2603
    if(s->flags&CODEC_FLAG_EMU_EDGE){
2604
        if(   (unsigned)src_x >= s->h_edge_pos - 17
2605
           || (unsigned)src_y >= s->v_edge_pos - 17){
2606
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2607
            ptr= s->edge_emu_buffer;
2608
        }
2609
    }
2610

    
2611
    if((motion_x|motion_y)&7){
2612
        s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2613
        s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2614
    }else{
2615
        int dxy;
2616

    
2617
        dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2618
        if (s->no_rounding){
2619
            s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2620
        }else{
2621
            s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2622
        }
2623
    }
2624

    
2625
    if(s->flags&CODEC_FLAG_GRAY) return;
2626

    
2627
    motion_x= s->sprite_offset[1][0];
2628
    motion_y= s->sprite_offset[1][1];
2629
    src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2630
    src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2631
    motion_x<<=(3-s->sprite_warping_accuracy);
2632
    motion_y<<=(3-s->sprite_warping_accuracy);
2633
    src_x = clip(src_x, -8, s->width>>1);
2634
    if (src_x == s->width>>1)
2635
        motion_x =0;
2636
    src_y = clip(src_y, -8, s->height>>1);
2637
    if (src_y == s->height>>1)
2638
        motion_y =0;
2639

    
2640
    offset = (src_y * uvlinesize) + src_x;
2641
    ptr = ref_picture[1] + offset;
2642
    if(s->flags&CODEC_FLAG_EMU_EDGE){
2643
        if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2644
           || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2645
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2646
            ptr= s->edge_emu_buffer;
2647
            emu=1;
2648
        }
2649
    }
2650
    s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2651

    
2652
    ptr = ref_picture[2] + offset;
2653
    if(emu){
2654
        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2655
        ptr= s->edge_emu_buffer;
2656
    }
2657
    s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2658

    
2659
    return;
2660
}
2661

    
2662
static inline void gmc_motion(MpegEncContext *s,
2663
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2664
                               uint8_t **ref_picture)
2665
{
2666
    uint8_t *ptr;
2667
    int linesize, uvlinesize;
2668
    const int a= s->sprite_warping_accuracy;
2669
    int ox, oy;
2670

    
2671
    linesize = s->linesize;
2672
    uvlinesize = s->uvlinesize;
2673

    
2674
    ptr = ref_picture[0];
2675

    
2676
    ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2677
    oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2678

    
2679
    s->dsp.gmc(dest_y, ptr, linesize, 16,
2680
           ox,
2681
           oy,
2682
           s->sprite_delta[0][0], s->sprite_delta[0][1],
2683
           s->sprite_delta[1][0], s->sprite_delta[1][1],
2684
           a+1, (1<<(2*a+1)) - s->no_rounding,
2685
           s->h_edge_pos, s->v_edge_pos);
2686
    s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2687
           ox + s->sprite_delta[0][0]*8,
2688
           oy + s->sprite_delta[1][0]*8,
2689
           s->sprite_delta[0][0], s->sprite_delta[0][1],
2690
           s->sprite_delta[1][0], s->sprite_delta[1][1],
2691
           a+1, (1<<(2*a+1)) - s->no_rounding,
2692
           s->h_edge_pos, s->v_edge_pos);
2693

    
2694
    if(s->flags&CODEC_FLAG_GRAY) return;
2695

    
2696
    ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2697
    oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2698

    
2699
    ptr = ref_picture[1];
2700
    s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2701
           ox,
2702
           oy,
2703
           s->sprite_delta[0][0], s->sprite_delta[0][1],
2704
           s->sprite_delta[1][0], s->sprite_delta[1][1],
2705
           a+1, (1<<(2*a+1)) - s->no_rounding,
2706
           s->h_edge_pos>>1, s->v_edge_pos>>1);
2707

    
2708
    ptr = ref_picture[2];
2709
    s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2710
           ox,
2711
           oy,
2712
           s->sprite_delta[0][0], s->sprite_delta[0][1],
2713
           s->sprite_delta[1][0], s->sprite_delta[1][1],
2714
           a+1, (1<<(2*a+1)) - s->no_rounding,
2715
           s->h_edge_pos>>1, s->v_edge_pos>>1);
2716
}
2717

    
2718
/**
2719
 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2720
 * @param buf destination buffer
2721
 * @param src source buffer
2722
 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2723
 * @param block_w width of block
2724
 * @param block_h height of block
2725
 * @param src_x x coordinate of the top left sample of the block in the source buffer
2726
 * @param src_y y coordinate of the top left sample of the block in the source buffer
2727
 * @param w width of the source buffer
2728
 * @param h height of the source buffer
2729
 */
2730
void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2731
                                    int src_x, int src_y, int w, int h){
2732
    int x, y;
2733
    int start_y, start_x, end_y, end_x;
2734

    
2735
    if(src_y>= h){
2736
        src+= (h-1-src_y)*linesize;
2737
        src_y=h-1;
2738
    }else if(src_y<=-block_h){
2739
        src+= (1-block_h-src_y)*linesize;
2740
        src_y=1-block_h;
2741
    }
2742
    if(src_x>= w){
2743
        src+= (w-1-src_x);
2744
        src_x=w-1;
2745
    }else if(src_x<=-block_w){
2746
        src+= (1-block_w-src_x);
2747
        src_x=1-block_w;
2748
    }
2749

    
2750
    start_y= FFMAX(0, -src_y);
2751
    start_x= FFMAX(0, -src_x);
2752
    end_y= FFMIN(block_h, h-src_y);
2753
    end_x= FFMIN(block_w, w-src_x);
2754

    
2755
    // copy existing part
2756
    for(y=start_y; y<end_y; y++){
2757
        for(x=start_x; x<end_x; x++){
2758
            buf[x + y*linesize]= src[x + y*linesize];
2759
        }
2760
    }
2761

    
2762
    //top
2763
    for(y=0; y<start_y; y++){
2764
        for(x=start_x; x<end_x; x++){
2765
            buf[x + y*linesize]= buf[x + start_y*linesize];
2766
        }
2767
    }
2768

    
2769
    //bottom
2770
    for(y=end_y; y<block_h; y++){
2771
        for(x=start_x; x<end_x; x++){
2772
            buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2773
        }
2774
    }
2775

    
2776
    for(y=0; y<block_h; y++){
2777
       //left
2778
        for(x=0; x<start_x; x++){
2779
            buf[x + y*linesize]= buf[start_x + y*linesize];
2780
        }
2781

    
2782
       //right
2783
        for(x=end_x; x<block_w; x++){
2784
            buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2785
        }
2786
    }
2787
}
2788

    
2789
static inline int hpel_motion(MpegEncContext *s,
2790
                                  uint8_t *dest, uint8_t *src,
2791
                                  int field_based, int field_select,
2792
                                  int src_x, int src_y,
2793
                                  int width, int height, int stride,
2794
                                  int h_edge_pos, int v_edge_pos,
2795
                                  int w, int h, op_pixels_func *pix_op,
2796
                                  int motion_x, int motion_y)
2797
{
2798
    int dxy;
2799
    int emu=0;
2800

    
2801
    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2802
    src_x += motion_x >> 1;
2803
    src_y += motion_y >> 1;
2804

    
2805
    /* WARNING: do no forget half pels */
2806
    src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2807
    if (src_x == width)
2808
        dxy &= ~1;
2809
    src_y = clip(src_y, -16, height);
2810
    if (src_y == height)
2811
        dxy &= ~2;
2812
    src += src_y * stride + src_x;
2813

    
2814
    if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2815
        if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2816
           || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2817
            ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2818
                             src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2819
            src= s->edge_emu_buffer;
2820
            emu=1;
2821
        }
2822
    }
2823
    if(field_select)
2824
        src += s->linesize;
2825
    pix_op[dxy](dest, src, stride, h);
2826
    return emu;
2827
}
2828

    
2829
static inline int hpel_motion_lowres(MpegEncContext *s,
2830
                                  uint8_t *dest, uint8_t *src,
2831
                                  int field_based, int field_select,
2832
                                  int src_x, int src_y,
2833
                                  int width, int height, int stride,
2834
                                  int h_edge_pos, int v_edge_pos,
2835
                                  int w, int h, h264_chroma_mc_func *pix_op,
2836
                                  int motion_x, int motion_y)
2837
{
2838
    const int lowres= s->avctx->lowres;
2839
    const int s_mask= (2<<lowres)-1;
2840
    int emu=0;
2841
    int sx, sy;
2842

    
2843
    if(s->quarter_sample){
2844
        motion_x/=2;
2845
        motion_y/=2;
2846
    }
2847

    
2848
    sx= motion_x & s_mask;
2849
    sy= motion_y & s_mask;
2850
    src_x += motion_x >> (lowres+1);
2851
    src_y += motion_y >> (lowres+1);
2852

    
2853
    src += src_y * stride + src_x;
2854

    
2855
    if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2856
       || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2857
        ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2858
                            src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2859
        src= s->edge_emu_buffer;
2860
        emu=1;
2861
    }
2862

    
2863
    sx <<= 2 - lowres;
2864
    sy <<= 2 - lowres;
2865
    if(field_select)
2866
        src += s->linesize;
2867
    pix_op[lowres](dest, src, stride, h, sx, sy);
2868
    return emu;
2869
}
2870

    
2871
/* apply one mpeg motion vector to the three components */
2872
static always_inline void mpeg_motion(MpegEncContext *s,
2873
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2874
                               int field_based, int bottom_field, int field_select,
2875
                               uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2876
                               int motion_x, int motion_y, int h)
2877
{
2878
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2879
    int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2880

    
2881
#if 0
2882
if(s->quarter_sample)
2883
{
2884
    motion_x>>=1;
2885
    motion_y>>=1;
2886
}
2887
#endif
2888

    
2889
    v_edge_pos = s->v_edge_pos >> field_based;
2890
    linesize   = s->current_picture.linesize[0] << field_based;
2891
    uvlinesize = s->current_picture.linesize[1] << field_based;
2892

    
2893
    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2894
    src_x = s->mb_x* 16               + (motion_x >> 1);
2895
    src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2896

    
2897
    if (s->out_format == FMT_H263) {
2898
        if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2899
            mx = (motion_x>>1)|(motion_x&1);
2900
            my = motion_y >>1;
2901
            uvdxy = ((my & 1) << 1) | (mx & 1);
2902
            uvsrc_x = s->mb_x* 8               + (mx >> 1);
2903
            uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2904
        }else{
2905
            uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2906
            uvsrc_x = src_x>>1;
2907
            uvsrc_y = src_y>>1;
2908
        }
2909
    }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2910
        mx = motion_x / 4;
2911
        my = motion_y / 4;
2912
        uvdxy = 0;
2913
        uvsrc_x = s->mb_x*8 + mx;
2914
        uvsrc_y = s->mb_y*8 + my;
2915
    } else {
2916
        if(s->chroma_y_shift){
2917
            mx = motion_x / 2;
2918
            my = motion_y / 2;
2919
            uvdxy = ((my & 1) << 1) | (mx & 1);
2920
            uvsrc_x = s->mb_x* 8               + (mx >> 1);
2921
            uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2922
        } else {
2923
            if(s->chroma_x_shift){
2924
            //Chroma422
2925
                mx = motion_x / 2;
2926
                uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2927
                uvsrc_x = s->mb_x* 8           + (mx >> 1);
2928
                uvsrc_y = src_y;
2929
            } else {
2930
            //Chroma444
2931
                uvdxy = dxy;
2932
                uvsrc_x = src_x;
2933
                uvsrc_y = src_y;
2934
            }
2935
        }
2936
    }
2937

    
2938
    ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2939
    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2940
    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2941

    
2942
    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2943
       || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2944
            if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2945
               s->codec_id == CODEC_ID_MPEG1VIDEO){
2946
                av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2947
                return ;
2948
            }
2949
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2950
                             src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2951
            ptr_y = s->edge_emu_buffer;
2952
            if(!(s->flags&CODEC_FLAG_GRAY)){
2953
                uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2954
                ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2955
                                 uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2956
                ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2957
                                 uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2958
                ptr_cb= uvbuf;
2959
                ptr_cr= uvbuf+16;
2960
            }
2961
    }
2962

    
2963
    if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2964
        dest_y += s->linesize;
2965
        dest_cb+= s->uvlinesize;
2966
        dest_cr+= s->uvlinesize;
2967
    }
2968

    
2969
    if(field_select){
2970
        ptr_y += s->linesize;
2971
        ptr_cb+= s->uvlinesize;
2972
        ptr_cr+= s->uvlinesize;
2973
    }
2974

    
2975
    pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2976

    
2977
    if(!(s->flags&CODEC_FLAG_GRAY)){
2978
        pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2979
        pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2980
    }
2981
#if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
2982
    if(s->out_format == FMT_H261){
2983
        ff_h261_loop_filter(s);
2984
    }
2985
#endif
2986
}
2987

    
2988
/* apply one mpeg motion vector to the three components */
2989
static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2990
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2991
                               int field_based, int bottom_field, int field_select,
2992
                               uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2993
                               int motion_x, int motion_y, int h)
2994
{
2995
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2996
    int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
2997
    const int lowres= s->avctx->lowres;
2998
    const int block_s= 8>>lowres;
2999
    const int s_mask= (2<<lowres)-1;
3000
    const int h_edge_pos = s->h_edge_pos >> lowres;
3001
    const int v_edge_pos = s->v_edge_pos >> lowres;
3002
    linesize   = s->current_picture.linesize[0] << field_based;
3003
    uvlinesize = s->current_picture.linesize[1] << field_based;
3004

    
3005
    if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3006
        motion_x/=2;
3007
        motion_y/=2;
3008
    }
3009

    
3010
    if(field_based){
3011
        motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3012
    }
3013

    
3014
    sx= motion_x & s_mask;
3015
    sy= motion_y & s_mask;
3016
    src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3017
    src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3018

    
3019
    if (s->out_format == FMT_H263) {
3020
        uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3021
        uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3022
        uvsrc_x = src_x>>1;
3023
        uvsrc_y = src_y>>1;
3024
    }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3025
        mx = motion_x / 4;
3026
        my = motion_y / 4;
3027
        uvsx = (2*mx) & s_mask;
3028
        uvsy = (2*my) & s_mask;
3029
        uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3030
        uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3031
    } else {
3032
        mx = motion_x / 2;
3033
        my = motion_y / 2;
3034
        uvsx = mx & s_mask;
3035
        uvsy = my & s_mask;
3036
        uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3037
        uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3038
    }
3039

    
3040
    ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3041
    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3042
    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3043

    
3044
    if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3045
       || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3046
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3047
                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3048
            ptr_y = s->edge_emu_buffer;
3049
            if(!(s->flags&CODEC_FLAG_GRAY)){
3050
                uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3051
                ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3052
                                 uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3053
                ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3054
                                 uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3055
                ptr_cb= uvbuf;
3056
                ptr_cr= uvbuf+16;
3057
            }
3058
    }
3059

    
3060
    if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3061
        dest_y += s->linesize;
3062
        dest_cb+= s->uvlinesize;
3063
        dest_cr+= s->uvlinesize;
3064
    }
3065

    
3066
    if(field_select){
3067
        ptr_y += s->linesize;
3068
        ptr_cb+= s->uvlinesize;
3069
        ptr_cr+= s->uvlinesize;
3070
    }
3071

    
3072
    sx <<= 2 - lowres;
3073
    sy <<= 2 - lowres;
3074
    pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3075

    
3076
    if(!(s->flags&CODEC_FLAG_GRAY)){
3077
        uvsx <<= 2 - lowres;
3078
        uvsy <<= 2 - lowres;
3079
        pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3080
        pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3081
    }
3082
    //FIXME h261 lowres loop filter
3083
}
3084

    
3085
//FIXME move to dsputil, avg variant, 16x16 version
3086
static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3087
    int x;
3088
    uint8_t * const top   = src[1];
3089
    uint8_t * const left  = src[2];
3090
    uint8_t * const mid   = src[0];
3091
    uint8_t * const right = src[3];
3092
    uint8_t * const bottom= src[4];
3093
#define OBMC_FILTER(x, t, l, m, r, b)\
3094
    dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3095
#define OBMC_FILTER4(x, t, l, m, r, b)\
3096
    OBMC_FILTER(x         , t, l, m, r, b);\
3097
    OBMC_FILTER(x+1       , t, l, m, r, b);\
3098
    OBMC_FILTER(x  +stride, t, l, m, r, b);\
3099
    OBMC_FILTER(x+1+stride, t, l, m, r, b);
3100

    
3101
    x=0;
3102
    OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3103
    OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3104
    OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3105
    OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3106
    OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3107
    OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3108
    x+= stride;
3109
    OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3110
    OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3111
    OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3112
    OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3113
    x+= stride;
3114
    OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3115
    OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3116
    OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3117
    OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3118
    x+= 2*stride;
3119
    OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3120
    OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3121
    OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3122
    OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3123
    x+= 2*stride;
3124
    OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3125
    OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3126
    OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3127
    OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3128
    OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3129
    OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3130
    x+= stride;
3131
    OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3132
    OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3133
    OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3134
    OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3135
}
3136

    
3137
/* obmc for 1 8x8 luma block */
3138
static inline void obmc_motion(MpegEncContext *s,
3139
                               uint8_t *dest, uint8_t *src,
3140
                               int src_x, int src_y,
3141
                               op_pixels_func *pix_op,
3142
                               int16_t mv[5][2]/* mid top left right bottom*/)
3143
#define MID    0
3144
{
3145
    int i;
3146
    uint8_t *ptr[5];
3147

    
3148
    assert(s->quarter_sample==0);
3149

    
3150
    for(i=0; i<5; i++){
3151
        if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3152
            ptr[i]= ptr[MID];
3153
        }else{
3154
            ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3155
            hpel_motion(s, ptr[i], src, 0, 0,
3156
                        src_x, src_y,
3157
                        s->width, s->height, s->linesize,
3158
                        s->h_edge_pos, s->v_edge_pos,
3159
                        8, 8, pix_op,
3160
                        mv[i][0], mv[i][1]);
3161
        }
3162
    }
3163

    
3164
    put_obmc(dest, ptr, s->linesize);
3165
}
3166

    
3167
static inline void qpel_motion(MpegEncContext *s,
3168
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3169
                               int field_based, int bottom_field, int field_select,
3170
                               uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3171
                               qpel_mc_func (*qpix_op)[16],
3172
                               int motion_x, int motion_y, int h)
3173
{
3174
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3175
    int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3176

    
3177
    dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3178
    src_x = s->mb_x *  16                 + (motion_x >> 2);
3179
    src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3180

    
3181
    v_edge_pos = s->v_edge_pos >> field_based;
3182
    linesize = s->linesize << field_based;
3183
    uvlinesize = s->uvlinesize << field_based;
3184

    
3185
    if(field_based){
3186
        mx= motion_x/2;
3187
        my= motion_y>>1;
3188
    }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3189
        static const int rtab[8]= {0,0,1,1,0,0,0,1};
3190
        mx= (motion_x>>1) + rtab[motion_x&7];
3191
        my= (motion_y>>1) + rtab[motion_y&7];
3192
    }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3193
        mx= (motion_x>>1)|(motion_x&1);
3194
        my= (motion_y>>1)|(motion_y&1);
3195
    }else{
3196
        mx= motion_x/2;
3197
        my= motion_y/2;
3198
    }
3199
    mx= (mx>>1)|(mx&1);
3200
    my= (my>>1)|(my&1);
3201

    
3202
    uvdxy= (mx&1) | ((my&1)<<1);
3203
    mx>>=1;
3204
    my>>=1;
3205

    
3206
    uvsrc_x = s->mb_x *  8                 + mx;
3207
    uvsrc_y = s->mb_y * (8 >> field_based) + my;
3208

    
3209
    ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3210
    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3211
    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3212

    
3213
    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3214
       || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3215
        ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3216
                         src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3217
        ptr_y= s->edge_emu_buffer;
3218
        if(!(s->flags&CODEC_FLAG_GRAY)){
3219
            uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3220
            ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3221
                             uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3222
            ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3223
                             uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3224
            ptr_cb= uvbuf;
3225
            ptr_cr= uvbuf + 16;
3226
        }
3227
    }
3228

    
3229
    if(!field_based)
3230
        qpix_op[0][dxy](dest_y, ptr_y, linesize);
3231
    else{
3232
        if(bottom_field){
3233
            dest_y += s->linesize;
3234
            dest_cb+= s->uvlinesize;
3235
            dest_cr+= s->uvlinesize;
3236
        }
3237

    
3238
        if(field_select){
3239
            ptr_y  += s->linesize;
3240
            ptr_cb += s->uvlinesize;
3241
            ptr_cr += s->uvlinesize;
3242
        }
3243
        //damn interlaced mode
3244
        //FIXME boundary mirroring is not exactly correct here
3245
        qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3246
        qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3247
    }
3248
    if(!(s->flags&CODEC_FLAG_GRAY)){
3249
        pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3250
        pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3251
    }
3252
}
3253

    
3254
inline int ff_h263_round_chroma(int x){
3255
    if (x >= 0)
3256
        return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3257
    else {
3258
        x = -x;
3259
        return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3260
    }
3261
}
3262

    
3263
/**
3264
 * h263 chorma 4mv motion compensation.
3265
 */
3266
static inline void chroma_4mv_motion(MpegEncContext *s,
3267
                                     uint8_t *dest_cb, uint8_t *dest_cr,
3268
                                     uint8_t **ref_picture,
3269
                                     op_pixels_func *pix_op,
3270
                                     int mx, int my){
3271
    int dxy, emu=0, src_x, src_y, offset;
3272
    uint8_t *ptr;
3273

    
3274
    /* In case of 8X8, we construct a single chroma motion vector
3275
       with a special rounding */
3276
    mx= ff_h263_round_chroma(mx);
3277
    my= ff_h263_round_chroma(my);
3278

    
3279
    dxy = ((my & 1) << 1) | (mx & 1);
3280
    mx >>= 1;
3281
    my >>= 1;
3282

    
3283
    src_x = s->mb_x * 8 + mx;
3284
    src_y = s->mb_y * 8 + my;
3285
    src_x = clip(src_x, -8, s->width/2);
3286
    if (src_x == s->width/2)
3287
        dxy &= ~1;
3288
    src_y = clip(src_y, -8, s->height/2);
3289
    if (src_y == s->height/2)
3290
        dxy &= ~2;
3291

    
3292
    offset = (src_y * (s->uvlinesize)) + src_x;
3293
    ptr = ref_picture[1] + offset;
3294
    if(s->flags&CODEC_FLAG_EMU_EDGE){
3295
        if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3296
           || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3297
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3298
            ptr= s->edge_emu_buffer;
3299
            emu=1;
3300
        }
3301
    }
3302
    pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3303

    
3304
    ptr = ref_picture[2] + offset;
3305
    if(emu){
3306
        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3307
        ptr= s->edge_emu_buffer;
3308
    }
3309
    pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3310
}
3311

    
3312
static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3313
                                     uint8_t *dest_cb, uint8_t *dest_cr,
3314
                                     uint8_t **ref_picture,
3315
                                     h264_chroma_mc_func *pix_op,
3316
                                     int mx, int my){
3317
    const int lowres= s->avctx->lowres;
3318
    const int block_s= 8>>lowres;
3319
    const int s_mask= (2<<lowres)-1;
3320
    const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3321
    const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3322
    int emu=0, src_x, src_y, offset, sx, sy;
3323
    uint8_t *ptr;
3324

    
3325
    if(s->quarter_sample){
3326
        mx/=2;
3327
        my/=2;
3328
    }
3329

    
3330
    /* In case of 8X8, we construct a single chroma motion vector
3331
       with a special rounding */
3332
    mx= ff_h263_round_chroma(mx);
3333
    my= ff_h263_round_chroma(my);
3334

    
3335
    sx= mx & s_mask;
3336
    sy= my & s_mask;
3337
    src_x = s->mb_x*block_s + (mx >> (lowres+1));
3338
    src_y = s->mb_y*block_s + (my >> (lowres+1));
3339

    
3340
    offset = src_y * s->uvlinesize + src_x;
3341
    ptr = ref_picture[1] + offset;
3342
    if(s->flags&CODEC_FLAG_EMU_EDGE){
3343
        if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3344
           || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3345
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3346
            ptr= s->edge_emu_buffer;
3347
            emu=1;
3348
        }
3349
    }
3350
    sx <<= 2 - lowres;
3351
    sy <<= 2 - lowres;
3352
    pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3353

    
3354
    ptr = ref_picture[2] + offset;
3355
    if(emu){
3356
        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3357
        ptr= s->edge_emu_buffer;
3358
    }
3359
    pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3360
}
3361

    
3362
/**
3363
 * motion compensation of a single macroblock
3364
 * @param s context
3365
 * @param dest_y luma destination pointer
3366
 * @param dest_cb chroma cb/u destination pointer
3367
 * @param dest_cr chroma cr/v destination pointer
3368
 * @param dir direction (0->forward, 1->backward)
3369
 * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3370
 * @param pic_op halfpel motion compensation function (average or put normally)
3371
 * @param pic_op qpel motion compensation function (average or put normally)
3372
 * the motion vectors are taken from s->mv and the MV type from s->mv_type
3373
 */
3374
static inline void MPV_motion(MpegEncContext *s,
3375
                              uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3376
                              int dir, uint8_t **ref_picture,
3377
                              op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3378
{
3379
    int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3380
    int mb_x, mb_y, i;
3381
    uint8_t *ptr, *dest;
3382

    
3383
    mb_x = s->mb_x;
3384
    mb_y = s->mb_y;
3385

    
3386
    if(s->obmc && s->pict_type != B_TYPE){
3387
        int16_t mv_cache[4][4][2];
3388
        const int xy= s->mb_x + s->mb_y*s->mb_stride;
3389
        const int mot_stride= s->b8_stride;
3390
        const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3391

    
3392
        assert(!s->mb_skipped);
3393

    
3394
        memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3395
        memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3396
        memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3397

    
3398
        if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3399
            memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3400
        }else{
3401
            memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3402
        }
3403

    
3404
        if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3405
            *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3406
            *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3407
        }else{
3408
            *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3409
            *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3410
        }
3411

    
3412
        if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3413
            *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3414
            *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3415
        }else{
3416
            *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3417
            *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3418
        }
3419

    
3420
        mx = 0;
3421
        my = 0;
3422
        for(i=0;i<4;i++) {
3423
            const int x= (i&1)+1;
3424
            const int y= (i>>1)+1;
3425
            int16_t mv[5][2]= {
3426
                {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3427
                {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3428
                {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3429
                {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3430
                {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3431
            //FIXME cleanup
3432
            obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3433
                        ref_picture[0],
3434
                        mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3435
                        pix_op[1],
3436
                        mv);
3437

    
3438
            mx += mv[0][0];
3439
            my += mv[0][1];
3440
        }
3441
        if(!(s->flags&CODEC_FLAG_GRAY))
3442
            chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3443

    
3444
        return;
3445
    }
3446

    
3447
    switch(s->mv_type) {
3448
    case MV_TYPE_16X16:
3449
        if(s->mcsel){
3450
            if(s->real_sprite_warping_points==1){
3451
                gmc1_motion(s, dest_y, dest_cb, dest_cr,
3452
                            ref_picture);
3453
            }else{
3454
                gmc_motion(s, dest_y, dest_cb, dest_cr,
3455
                            ref_picture);
3456
            }
3457
        }else if(s->quarter_sample){
3458
            qpel_motion(s, dest_y, dest_cb, dest_cr,
3459
                        0, 0, 0,
3460
                        ref_picture, pix_op, qpix_op,
3461
                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
3462
        }else if(s->mspel){
3463
            ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3464
                        ref_picture, pix_op,
3465
                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
3466
        }else
3467
        {
3468
            mpeg_motion(s, dest_y, dest_cb, dest_cr,
3469
                        0, 0, 0,
3470
                        ref_picture, pix_op,
3471
                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
3472
        }
3473
        break;
3474
    case MV_TYPE_8X8:
3475
        mx = 0;
3476
        my = 0;
3477
        if(s->quarter_sample){
3478
            for(i=0;i<4;i++) {
3479
                motion_x = s->mv[dir][i][0];
3480
                motion_y = s->mv[dir][i][1];
3481

    
3482
                dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3483
                src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3484
                src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3485

    
3486
                /* WARNING: do no forget half pels */
3487
                src_x = clip(src_x, -16, s->width);
3488
                if (src_x == s->width)
3489
                    dxy &= ~3;
3490
                src_y = clip(src_y, -16, s->height);
3491
                if (src_y == s->height)
3492
                    dxy &= ~12;
3493

    
3494
                ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3495
                if(s->flags&CODEC_FLAG_EMU_EDGE){
3496
                    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3497
                       || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3498
                        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3499
                        ptr= s->edge_emu_buffer;
3500
                    }
3501
                }
3502
                dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3503
                qpix_op[1][dxy](dest, ptr, s->linesize);
3504

    
3505
                mx += s->mv[dir][i][0]/2;
3506
                my += s->mv[dir][i][1]/2;
3507
            }
3508
        }else{
3509
            for(i=0;i<4;i++) {
3510
                hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3511
                            ref_picture[0], 0, 0,
3512
                            mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3513
                            s->width, s->height, s->linesize,
3514
                            s->h_edge_pos, s->v_edge_pos,
3515
                            8, 8, pix_op[1],
3516
                            s->mv[dir][i][0], s->mv[dir][i][1]);
3517

    
3518
                mx += s->mv[dir][i][0];
3519
                my += s->mv[dir][i][1];
3520
            }
3521
        }
3522

    
3523
        if(!(s->flags&CODEC_FLAG_GRAY))
3524
            chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3525
        break;
3526
    case MV_TYPE_FIELD:
3527
        if (s->picture_structure == PICT_FRAME) {
3528
            if(s->quarter_sample){
3529
                for(i=0; i<2; i++){
3530
                    qpel_motion(s, dest_y, dest_cb, dest_cr,
3531
                                1, i, s->field_select[dir][i],
3532
                                ref_picture, pix_op, qpix_op,
3533
                                s->mv[dir][i][0], s->mv[dir][i][1], 8);
3534
                }
3535
            }else{
3536
                /* top field */
3537
                mpeg_motion(s, dest_y, dest_cb, dest_cr,
3538
                            1, 0, s->field_select[dir][0],
3539
                            ref_picture, pix_op,
3540
                            s->mv[dir][0][0], s->mv[dir][0][1], 8);
3541
                /* bottom field */
3542
                mpeg_motion(s, dest_y, dest_cb, dest_cr,
3543
                            1, 1, s->field_select[dir][1],
3544
                            ref_picture, pix_op,
3545
                            s->mv[dir][1][0], s->mv[dir][1][1], 8);
3546
            }
3547
        } else {
3548
            if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3549
                ref_picture= s->current_picture_ptr->data;
3550
            }
3551

    
3552
            mpeg_motion(s, dest_y, dest_cb, dest_cr,
3553
                        0, 0, s->field_select[dir][0],
3554
                        ref_picture, pix_op,
3555
                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
3556
        }
3557
        break;
3558
    case MV_TYPE_16X8:
3559
        for(i=0; i<2; i++){
3560
            uint8_t ** ref2picture;
3561

    
3562
            if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3563
                ref2picture= ref_picture;
3564
            }else{
3565
                ref2picture= s->current_picture_ptr->data;
3566
            }
3567

    
3568
            mpeg_motion(s, dest_y, dest_cb, dest_cr,
3569
                        0, 0, s->field_select[dir][i],
3570
                        ref2picture, pix_op,
3571
                        s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3572

    
3573
            dest_y += 16*s->linesize;
3574
            dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3575
            dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3576
        }
3577
        break;
3578
    case MV_TYPE_DMV:
3579
        if(s->picture_structure == PICT_FRAME){
3580
            for(i=0; i<2; i++){
3581
                int j;
3582
                for(j=0; j<2; j++){
3583
                    mpeg_motion(s, dest_y, dest_cb, dest_cr,
3584
                                1, j, j^i,
3585
                                ref_picture, pix_op,
3586
                                s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3587
                }
3588
                pix_op = s->dsp.avg_pixels_tab;
3589
            }
3590
        }else{
3591
            for(i=0; i<2; i++){
3592
                mpeg_motion(s, dest_y, dest_cb, dest_cr,
3593
                            0, 0, s->picture_structure != i+1,
3594
                            ref_picture, pix_op,
3595
                            s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3596

    
3597
                // after put we make avg of the same block
3598
                pix_op=s->dsp.avg_pixels_tab;
3599

    
3600
                //opposite parity is always in the same frame if this is second field
3601
                if(!s->first_field){
3602
                    ref_picture = s->current_picture_ptr->data;
3603
                }
3604
            }
3605
        }
3606
    break;
3607
    default: assert(0);
3608
    }
3609
}
3610

    
3611
/**
3612
 * motion compensation of a single macroblock
3613
 * @param s context
3614
 * @param dest_y luma destination pointer
3615
 * @param dest_cb chroma cb/u destination pointer
3616
 * @param dest_cr chroma cr/v destination pointer
3617
 * @param dir direction (0->forward, 1->backward)
3618
 * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3619
 * @param pic_op halfpel motion compensation function (average or put normally)
3620
 * the motion vectors are taken from s->mv and the MV type from s->mv_type
3621
 */
3622
static inline void MPV_motion_lowres(MpegEncContext *s,
3623
                              uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3624
                              int dir, uint8_t **ref_picture,
3625
                              h264_chroma_mc_func *pix_op)
3626
{
3627
    int mx, my;
3628
    int mb_x, mb_y, i;
3629
    const int lowres= s->avctx->lowres;
3630
    const int block_s= 8>>lowres;
3631

    
3632
    mb_x = s->mb_x;
3633
    mb_y = s->mb_y;
3634

    
3635
    switch(s->mv_type) {
3636
    case MV_TYPE_16X16:
3637
        mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3638
                    0, 0, 0,
3639
                    ref_picture, pix_op,
3640
                    s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3641
        break;
3642
    case MV_TYPE_8X8:
3643
        mx = 0;
3644
        my = 0;
3645
            for(i=0;i<4;i++) {
3646
                hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3647
                            ref_picture[0], 0, 0,
3648
                            (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3649
                            s->width, s->height, s->linesize,
3650
                            s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3651
                            block_s, block_s, pix_op,
3652
                            s->mv[dir][i][0], s->mv[dir][i][1]);
3653

    
3654
                mx += s->mv[dir][i][0];
3655
                my += s->mv[dir][i][1];
3656
            }
3657

    
3658
        if(!(s->flags&CODEC_FLAG_GRAY))
3659
            chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3660
        break;
3661
    case MV_TYPE_FIELD:
3662
        if (s->picture_structure == PICT_FRAME) {
3663
            /* top field */
3664
            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3665
                        1, 0, s->field_select[dir][0],
3666
                        ref_picture, pix_op,
3667
                        s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3668
            /* bottom field */
3669
            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3670
                        1, 1, s->field_select[dir][1],
3671
                        ref_picture, pix_op,
3672
                        s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3673
        } else {
3674
            if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3675
                ref_picture= s->current_picture_ptr->data;
3676
            }
3677

    
3678
            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3679
                        0, 0, s->field_select[dir][0],
3680
                        ref_picture, pix_op,
3681
                        s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3682
        }
3683
        break;
3684
    case MV_TYPE_16X8:
3685
        for(i=0; i<2; i++){
3686
            uint8_t ** ref2picture;
3687

    
3688
            if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3689
                ref2picture= ref_picture;
3690
            }else{
3691
                ref2picture= s->current_picture_ptr->data;
3692
            }
3693

    
3694
            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3695
                        0, 0, s->field_select[dir][i],
3696
                        ref2picture, pix_op,
3697
                        s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3698

    
3699
            dest_y += 2*block_s*s->linesize;
3700
            dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3701
            dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3702
        }
3703
        break;
3704
    case MV_TYPE_DMV:
3705
        if(s->picture_structure == PICT_FRAME){
3706
            for(i=0; i<2; i++){
3707
                int j;
3708
                for(j=0; j<2; j++){
3709
                    mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3710
                                1, j, j^i,
3711
                                ref_picture, pix_op,
3712
                                s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3713
                }
3714
                pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3715
            }
3716
        }else{
3717
            for(i=0; i<2; i++){
3718
                mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3719
                            0, 0, s->picture_structure != i+1,
3720
                            ref_picture, pix_op,
3721
                            s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3722

    
3723
                // after put we make avg of the same block
3724
                pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3725

    
3726
                //opposite parity is always in the same frame if this is second field
3727
                if(!s->first_field){
3728
                    ref_picture = s->current_picture_ptr->data;
3729
                }
3730
            }
3731
        }
3732
    break;
3733
    default: assert(0);
3734
    }
3735
}
3736

    
3737
/* put block[] to dest[] */
3738
static inline void put_dct(MpegEncContext *s,
3739
                           DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3740
{
3741
    s->dct_unquantize_intra(s, block, i, qscale);
3742
    s->dsp.idct_put (dest, line_size, block);
3743
}
3744

    
3745
/* add block[] to dest[] */
3746
static inline void add_dct(MpegEncContext *s,
3747
                           DCTELEM *block, int i, uint8_t *dest, int line_size)
3748
{
3749
    if (s->block_last_index[i] >= 0) {
3750
        s->dsp.idct_add (dest, line_size, block);
3751
    }
3752
}
3753

    
3754
static inline void add_dequant_dct(MpegEncContext *s,
3755
                           DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3756
{
3757
    if (s->block_last_index[i] >= 0) {
3758
        s->dct_unquantize_inter(s, block, i, qscale);
3759

    
3760
        s->dsp.idct_add (dest, line_size, block);
3761
    }
3762
}
3763

    
3764
/**
3765
 * cleans dc, ac, coded_block for the current non intra MB
3766
 */
3767
void ff_clean_intra_table_entries(MpegEncContext *s)
3768
{
3769
    int wrap = s->b8_stride;
3770
    int xy = s->block_index[0];
3771

    
3772
    s->dc_val[0][xy           ] =
3773
    s->dc_val[0][xy + 1       ] =
3774
    s->dc_val[0][xy     + wrap] =
3775
    s->dc_val[0][xy + 1 + wrap] = 1024;
3776
    /* ac pred */
3777
    memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3778
    memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3779
    if (s->msmpeg4_version>=3) {
3780
        s->coded_block[xy           ] =
3781
        s->coded_block[xy + 1       ] =
3782
        s->coded_block[xy     + wrap] =
3783
        s->coded_block[xy + 1 + wrap] = 0;
3784
    }
3785
    /* chroma */
3786
    wrap = s->mb_stride;
3787
    xy = s->mb_x + s->mb_y * wrap;
3788
    s->dc_val[1][xy] =
3789
    s->dc_val[2][xy] = 1024;
3790
    /* ac pred */
3791
    memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3792
    memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3793

    
3794
    s->mbintra_table[xy]= 0;
3795
}
3796

    
3797
/* generic function called after a macroblock has been parsed by the
3798
   decoder or after it has been encoded by the encoder.
3799

3800
   Important variables used:
3801
   s->mb_intra : true if intra macroblock
3802
   s->mv_dir   : motion vector direction
3803
   s->mv_type  : motion vector type
3804
   s->mv       : motion vector
3805
   s->interlaced_dct : true if interlaced dct used (mpeg2)
3806
 */
3807
static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3808
{
3809
    int mb_x, mb_y;
3810
    const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3811
#ifdef HAVE_XVMC
3812
    if(s->avctx->xvmc_acceleration){
3813
        XVMC_decode_mb(s);//xvmc uses pblocks
3814
        return;
3815
    }
3816
#endif
3817

    
3818
    mb_x = s->mb_x;
3819
    mb_y = s->mb_y;
3820

    
3821
    if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3822
       /* save DCT coefficients */
3823
       int i,j;
3824
       DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3825
       for(i=0; i<6; i++)
3826
           for(j=0; j<64; j++)
3827
               *dct++ = block[i][s->dsp.idct_permutation[j]];
3828
    }
3829

    
3830
    s->current_picture.qscale_table[mb_xy]= s->qscale;
3831

    
3832
    /* update DC predictors for P macroblocks */
3833
    if (!s->mb_intra) {
3834
        if (s->h263_pred || s->h263_aic) {
3835
            if(s->mbintra_table[mb_xy])
3836
                ff_clean_intra_table_entries(s);
3837
        } else {
3838
            s->last_dc[0] =
3839
            s->last_dc[1] =
3840
            s->last_dc[2] = 128 << s->intra_dc_precision;
3841
        }
3842
    }
3843
    else if (s->h263_pred || s->h263_aic)
3844
        s->mbintra_table[mb_xy]=1;
3845

    
3846
    if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3847
        uint8_t *dest_y, *dest_cb, *dest_cr;
3848
        int dct_linesize, dct_offset;
3849
        op_pixels_func (*op_pix)[4];
3850
        qpel_mc_func (*op_qpix)[16];
3851
        const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3852
        const int uvlinesize= s->current_picture.linesize[1];
3853
        const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3854
        const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3855

    
3856
        /* avoid copy if macroblock skipped in last frame too */
3857
        /* skip only during decoding as we might trash the buffers during encoding a bit */
3858
        if(!s->encoding){
3859
            uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3860
            const int age= s->current_picture.age;
3861

    
3862
            assert(age);
3863

    
3864
            if (s->mb_skipped) {
3865
                s->mb_skipped= 0;
3866
                assert(s->pict_type!=I_TYPE);
3867

    
3868
                (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3869
                if(*mbskip_ptr >99) *mbskip_ptr= 99;
3870

    
3871
                /* if previous was skipped too, then nothing to do !  */
3872
                if (*mbskip_ptr >= age && s->current_picture.reference){
3873
                    return;
3874
                }
3875
            } else if(!s->current_picture.reference){
3876
                (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3877
                if(*mbskip_ptr >99) *mbskip_ptr= 99;
3878
            } else{
3879
                *mbskip_ptr = 0; /* not skipped */
3880
            }
3881
        }
3882

    
3883
        dct_linesize = linesize << s->interlaced_dct;
3884
        dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3885

    
3886
        if(readable){
3887
            dest_y=  s->dest[0];
3888
            dest_cb= s->dest[1];
3889
            dest_cr= s->dest[2];
3890
        }else{
3891
            dest_y = s->b_scratchpad;
3892
            dest_cb= s->b_scratchpad+16*linesize;
3893
            dest_cr= s->b_scratchpad+32*linesize;
3894
        }
3895

    
3896
        if (!s->mb_intra) {
3897
            /* motion handling */
3898
            /* decoding or more than one mb_type (MC was already done otherwise) */
3899
            if(!s->encoding){
3900
                if(lowres_flag){
3901
                    h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3902

    
3903
                    if (s->mv_dir & MV_DIR_FORWARD) {
3904
                        MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3905
                        op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3906
                    }
3907
                    if (s->mv_dir & MV_DIR_BACKWARD) {
3908
                        MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3909
                    }
3910
                }else{
3911
                    if ((!s->no_rounding) || s->pict_type==B_TYPE){
3912
                        op_pix = s->dsp.put_pixels_tab;
3913
                        op_qpix= s->dsp.put_qpel_pixels_tab;
3914
                    }else{
3915
                        op_pix = s->dsp.put_no_rnd_pixels_tab;
3916
                        op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3917
                    }
3918
                    if (s->mv_dir & MV_DIR_FORWARD) {
3919
                        MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3920
                        op_pix = s->dsp.avg_pixels_tab;
3921
                        op_qpix= s->dsp.avg_qpel_pixels_tab;
3922
                    }
3923
                    if (s->mv_dir & MV_DIR_BACKWARD) {
3924
                        MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3925
                    }
3926
                }
3927
            }
3928

    
3929
            /* skip dequant / idct if we are really late ;) */
3930
            if(s->hurry_up>1) goto skip_idct;
3931
            if(s->avctx->skip_idct){
3932
                if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
3933
                   ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
3934
                   || s->avctx->skip_idct >= AVDISCARD_ALL)
3935
                    goto skip_idct;
3936
            }
3937

    
3938
            /* add dct residue */
3939
            if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3940
                                || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3941
                add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3942
                add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3943
                add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3944
                add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3945

    
3946
                if(!(s->flags&CODEC_FLAG_GRAY)){
3947
                    add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3948
                    add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3949
                }
3950
            } else if(s->codec_id != CODEC_ID_WMV2){
3951
                add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3952
                add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3953
                add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3954
                add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3955

    
3956
                if(!(s->flags&CODEC_FLAG_GRAY)){
3957
                    if(s->chroma_y_shift){//Chroma420
3958
                        add_dct(s, block[4], 4, dest_cb, uvlinesize);
3959
                        add_dct(s, block[5], 5, dest_cr, uvlinesize);
3960
                    }else{
3961
                        //chroma422
3962
                        dct_linesize = uvlinesize << s->interlaced_dct;
3963
                        dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3964

    
3965
                        add_dct(s, block[4], 4, dest_cb, dct_linesize);
3966
                        add_dct(s, block[5], 5, dest_cr, dct_linesize);
3967
                        add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3968
                        add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3969
                        if(!s->chroma_x_shift){//Chroma444
3970
                            add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3971
                            add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3972
                            add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3973
                            add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3974
                        }
3975
                    }
3976
                }//fi gray
3977
            }
3978
            else{
3979
                ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3980
            }
3981
        } else {
3982
            /* dct only in intra block */
3983
            if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3984
                put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3985
                put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3986
                put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3987
                put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3988

    
3989
                if(!(s->flags&CODEC_FLAG_GRAY)){
3990
                    put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3991
                    put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3992
                }
3993
            }else{
3994
                s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
3995
                s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
3996
                s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
3997
                s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
3998

    
3999
                if(!(s->flags&CODEC_FLAG_GRAY)){
4000
                    if(s->chroma_y_shift){
4001
                        s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4002
                        s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4003
                    }else{
4004

    
4005
                        dct_linesize = uvlinesize << s->interlaced_dct;
4006
                        dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4007

    
4008
                        s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4009
                        s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4010
                        s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4011
                        s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4012
                        if(!s->chroma_x_shift){//Chroma444
4013
                            s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4014
                            s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4015
                            s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4016
                            s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4017
                        }
4018
                    }
4019
                }//gray
4020
            }
4021
        }
4022
skip_idct:
4023
        if(!readable){
4024
            s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4025
            s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4026
            s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4027
        }
4028
    }
4029
}
4030

    
4031
void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4032
    if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4033
    else                  MPV_decode_mb_internal(s, block, 0);
4034
}
4035

    
4036
#ifdef CONFIG_ENCODERS
4037

    
4038
static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4039
{
4040
    static const char tab[64]=
4041
        {3,2,2,1,1,1,1,1,
4042
         1,1,1,1,1,1,1,1,
4043
         1,1,1,1,1,1,1,1,
4044
         0,0,0,0,0,0,0,0,
4045
         0,0,0,0,0,0,0,0,
4046
         0,0,0,0,0,0,0,0,
4047
         0,0,0,0,0,0,0,0,
4048
         0,0,0,0,0,0,0,0};
4049
    int score=0;
4050
    int run=0;
4051
    int i;
4052
    DCTELEM *block= s->block[n];
4053
    const int last_index= s->block_last_index[n];
4054
    int skip_dc;
4055

    
4056
    if(threshold<0){
4057
        skip_dc=0;
4058
        threshold= -threshold;
4059
    }else
4060
        skip_dc=1;
4061

    
4062
    /* are all which we could set to zero are allready zero? */
4063
    if(last_index<=skip_dc - 1) return;
4064

    
4065
    for(i=0; i<=last_index; i++){
4066
        const int j = s->intra_scantable.permutated[i];
4067
        const int level = ABS(block[j]);
4068
        if(level==1){
4069
            if(skip_dc && i==0) continue;
4070
            score+= tab[run];
4071
            run=0;
4072
        }else if(level>1){
4073
            return;
4074
        }else{
4075
            run++;
4076
        }
4077
    }
4078
    if(score >= threshold) return;
4079
    for(i=skip_dc; i<=last_index; i++){
4080
        const int j = s->intra_scantable.permutated[i];
4081
        block[j]=0;
4082
    }
4083
    if(block[0]) s->block_last_index[n]= 0;
4084
    else         s->block_last_index[n]= -1;
4085
}
4086

    
4087
static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4088
{
4089
    int i;
4090
    const int maxlevel= s->max_qcoeff;
4091
    const int minlevel= s->min_qcoeff;
4092
    int overflow=0;
4093

    
4094
    if(s->mb_intra){
4095
        i=1; //skip clipping of intra dc
4096
    }else
4097
        i=0;
4098

    
4099
    for(;i<=last_index; i++){
4100
        const int j= s->intra_scantable.permutated[i];
4101
        int level = block[j];
4102

    
4103
        if     (level>maxlevel){
4104
            level=maxlevel;
4105
            overflow++;
4106
        }else if(level<minlevel){
4107
            level=minlevel;
4108
            overflow++;
4109
        }
4110

    
4111
        block[j]= level;
4112
    }
4113

    
4114
    if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4115
        av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4116
}
4117

    
4118
#endif //CONFIG_ENCODERS
4119

    
4120
/**
4121
 *
4122
 * @param h is the normal height, this will be reduced automatically if needed for the last row
4123
 */
4124
void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4125
    if (s->avctx->draw_horiz_band) {
4126
        AVFrame *src;
4127
        int offset[4];
4128

    
4129
        if(s->picture_structure != PICT_FRAME){
4130
            h <<= 1;
4131
            y <<= 1;
4132
            if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4133
        }
4134

    
4135
        h= FFMIN(h, s->avctx->height - y);
4136

    
4137
        if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4138
            src= (AVFrame*)s->current_picture_ptr;
4139
        else if(s->last_picture_ptr)
4140
            src= (AVFrame*)s->last_picture_ptr;
4141
        else
4142
            return;
4143

    
4144
        if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4145
            offset[0]=
4146
            offset[1]=
4147
            offset[2]=
4148
            offset[3]= 0;
4149
        }else{
4150
            offset[0]= y * s->linesize;;
4151
            offset[1]=
4152
            offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4153
            offset[3]= 0;
4154
        }
4155

    
4156
        emms_c();
4157

    
4158
        s->avctx->draw_horiz_band(s->avctx, src, offset,
4159
                                  y, s->picture_structure, h);
4160
    }
4161
}
4162

    
4163
void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4164
    const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4165
    const int uvlinesize= s->current_picture.linesize[1];
4166
    const int mb_size= 4 - s->avctx->lowres;
4167

    
4168
    s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4169
    s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4170
    s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4171
    s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4172
    s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4173
    s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4174
    //block_index is not used by mpeg2, so it is not affected by chroma_format
4175

    
4176
    s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4177
    s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4178
    s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4179

    
4180
    if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4181
    {
4182
        s->dest[0] += s->mb_y *   linesize << mb_size;
4183
        s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4184
        s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4185
    }
4186
}
4187

    
4188
#ifdef CONFIG_ENCODERS
4189

    
4190
static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4191
    int x, y;
4192
//FIXME optimize
4193
    for(y=0; y<8; y++){
4194
        for(x=0; x<8; x++){
4195
            int x2, y2;
4196
            int sum=0;
4197
            int sqr=0;
4198
            int count=0;
4199

    
4200
            for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4201
                for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4202
                    int v= ptr[x2 + y2*stride];
4203
                    sum += v;
4204
                    sqr += v*v;
4205
                    count++;
4206
                }
4207
            }
4208
            weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4209
        }
4210
    }
4211
}
4212

    
4213
static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4214
{
4215
    int16_t weight[6][64];
4216
    DCTELEM orig[6][64];
4217
    const int mb_x= s->mb_x;
4218
    const int mb_y= s->mb_y;
4219
    int i;
4220
    int skip_dct[6];
4221
    int dct_offset   = s->linesize*8; //default for progressive frames
4222
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4223
    int wrap_y, wrap_c;
4224

    
4225
    for(i=0; i<6; i++) skip_dct[i]=0;
4226

    
4227
    if(s->adaptive_quant){
4228
        const int last_qp= s->qscale;
4229
        const int mb_xy= mb_x + mb_y*s->mb_stride;
4230

    
4231
        s->lambda= s->lambda_table[mb_xy];
4232
        update_qscale(s);
4233

    
4234
        if(!(s->flags&CODEC_FLAG_QP_RD)){
4235
            s->dquant= s->qscale - last_qp;
4236

    
4237
            if(s->out_format==FMT_H263){
4238
                s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4239

    
4240
                if(s->codec_id==CODEC_ID_MPEG4){
4241
                    if(!s->mb_intra){
4242
                        if(s->pict_type == B_TYPE){
4243
                            if(s->dquant&1)
4244
                                s->dquant= (s->dquant/2)*2;
4245
                            if(s->mv_dir&MV_DIRECT)
4246
                                s->dquant= 0;
4247
                        }
4248
                        if(s->mv_type==MV_TYPE_8X8)
4249
                            s->dquant=0;
4250
                    }
4251
                }
4252
            }
4253
        }
4254
        ff_set_qscale(s, last_qp + s->dquant);
4255
    }else if(s->flags&CODEC_FLAG_QP_RD)
4256
        ff_set_qscale(s, s->qscale + s->dquant);
4257

    
4258
    wrap_y = s->linesize;
4259
    wrap_c = s->uvlinesize;
4260
    ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4261
    ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4262
    ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4263

    
4264
    if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4265
        uint8_t *ebuf= s->edge_emu_buffer + 32;
4266
        ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4267
        ptr_y= ebuf;
4268
        ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4269
        ptr_cb= ebuf+18*wrap_y;
4270
        ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4271
        ptr_cr= ebuf+18*wrap_y+8;
4272
    }
4273

    
4274
    if (s->mb_intra) {
4275
        if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4276
            int progressive_score, interlaced_score;
4277

    
4278
            s->interlaced_dct=0;
4279
            progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4280
                              +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4281

    
4282
            if(progressive_score > 0){
4283
                interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4284
                                  +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4285
                if(progressive_score > interlaced_score){
4286
                    s->interlaced_dct=1;
4287

    
4288
                    dct_offset= wrap_y;
4289
                    wrap_y<<=1;
4290
                }
4291
            }
4292
        }
4293

    
4294
        s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4295
        s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4296
        s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4297
        s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4298

    
4299
        if(s->flags&CODEC_FLAG_GRAY){
4300
            skip_dct[4]= 1;
4301
            skip_dct[5]= 1;
4302
        }else{
4303
            s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4304
            s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4305
        }
4306
    }else{
4307
        op_pixels_func (*op_pix)[4];
4308
        qpel_mc_func (*op_qpix)[16];
4309
        uint8_t *dest_y, *dest_cb, *dest_cr;
4310

    
4311
        dest_y  = s->dest[0];
4312
        dest_cb = s->dest[1];
4313
        dest_cr = s->dest[2];
4314

    
4315
        if ((!s->no_rounding) || s->pict_type==B_TYPE){
4316
            op_pix = s->dsp.put_pixels_tab;
4317
            op_qpix= s->dsp.put_qpel_pixels_tab;
4318
        }else{
4319
            op_pix = s->dsp.put_no_rnd_pixels_tab;
4320
            op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4321
        }
4322

    
4323
        if (s->mv_dir & MV_DIR_FORWARD) {
4324
            MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4325
            op_pix = s->dsp.avg_pixels_tab;
4326
            op_qpix= s->dsp.avg_qpel_pixels_tab;
4327
        }
4328
        if (s->mv_dir & MV_DIR_BACKWARD) {
4329
            MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4330
        }
4331

    
4332
        if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4333
            int progressive_score, interlaced_score;
4334

    
4335
            s->interlaced_dct=0;
4336
            progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4337
                              +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4338

    
4339
            if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4340

    
4341
            if(progressive_score>0){
4342
                interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4343
                                  +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4344

    
4345
                if(progressive_score > interlaced_score){
4346
                    s->interlaced_dct=1;
4347

    
4348
                    dct_offset= wrap_y;
4349
                    wrap_y<<=1;
4350
                }
4351
            }
4352
        }
4353

    
4354
        s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4355
        s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4356
        s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4357
        s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4358

    
4359
        if(s->flags&CODEC_FLAG_GRAY){
4360
            skip_dct[4]= 1;
4361
            skip_dct[5]= 1;
4362
        }else{
4363
            s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4364
            s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4365
        }
4366
        /* pre quantization */
4367
        if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4368
            //FIXME optimize
4369
            if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4370
            if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4371
            if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4372
            if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4373
            if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4374
            if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4375
        }
4376
    }
4377

    
4378
    if(s->avctx->quantizer_noise_shaping){
4379
        if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4380
        if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4381
        if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4382
        if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4383
        if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4384
        if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4385
        memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4386
    }
4387

    
4388
    /* DCT & quantize */
4389
    assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4390
    {
4391
        for(i=0;i<6;i++) {
4392
            if(!skip_dct[i]){
4393
                int overflow;
4394
                s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4395
            // FIXME we could decide to change to quantizer instead of clipping
4396
            // JS: I don't think that would be a good idea it could lower quality instead
4397
            //     of improve it. Just INTRADC clipping deserves changes in quantizer
4398
                if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4399
            }else
4400
                s->block_last_index[i]= -1;
4401
        }
4402
        if(s->avctx->quantizer_noise_shaping){
4403
            for(i=0;i<6;i++) {
4404
                if(!skip_dct[i]){
4405
                    s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4406
                }
4407
            }
4408
        }
4409

    
4410
        if(s->luma_elim_threshold && !s->mb_intra)
4411
            for(i=0; i<4; i++)
4412
                dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4413
        if(s->chroma_elim_threshold && !s->mb_intra)
4414
            for(i=4; i<6; i++)
4415
                dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4416

    
4417
        if(s->flags & CODEC_FLAG_CBP_RD){
4418
            for(i=0;i<6;i++) {
4419
                if(s->block_last_index[i] == -1)
4420
                    s->coded_score[i]= INT_MAX/256;
4421
            }
4422
        }
4423
    }
4424

    
4425
    if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4426
        s->block_last_index[4]=
4427
        s->block_last_index[5]= 0;
4428
        s->block[4][0]=
4429
        s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4430
    }
4431

    
4432
    //non c quantize code returns incorrect block_last_index FIXME
4433
    if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4434
        for(i=0; i<6; i++){
4435
            int j;
4436
            if(s->block_last_index[i]>0){
4437
                for(j=63; j>0; j--){
4438
                    if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4439
                }
4440
                s->block_last_index[i]= j;
4441
            }
4442
        }
4443
    }
4444

    
4445
    /* huffman encode */
4446
    switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4447
    case CODEC_ID_MPEG1VIDEO:
4448
    case CODEC_ID_MPEG2VIDEO:
4449
        mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4450
    case CODEC_ID_MPEG4:
4451
        mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4452
    case CODEC_ID_MSMPEG4V2:
4453
    case CODEC_ID_MSMPEG4V3:
4454
    case CODEC_ID_WMV1:
4455
        msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4456
    case CODEC_ID_WMV2:
4457
         ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4458
#ifdef CONFIG_H261_ENCODER
4459
    case CODEC_ID_H261:
4460
        ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4461
#endif
4462
    case CODEC_ID_H263:
4463
    case CODEC_ID_H263P:
4464
    case CODEC_ID_FLV1:
4465
    case CODEC_ID_RV10:
4466
    case CODEC_ID_RV20:
4467
        h263_encode_mb(s, s->block, motion_x, motion_y); break;
4468
    case CODEC_ID_MJPEG:
4469
        mjpeg_encode_mb(s, s->block); break;
4470
    default:
4471
        assert(0);
4472
    }
4473
}
4474

    
4475
#endif //CONFIG_ENCODERS
4476

    
4477
void ff_mpeg_flush(AVCodecContext *avctx){
4478
    int i;
4479
    MpegEncContext *s = avctx->priv_data;
4480

    
4481
    if(s==NULL || s->picture==NULL)
4482
        return;
4483

    
4484
    for(i=0; i<MAX_PICTURE_COUNT; i++){
4485
       if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4486
                                    || s->picture[i].type == FF_BUFFER_TYPE_USER))
4487
        avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4488
    }
4489
    s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4490

    
4491
    s->mb_x= s->mb_y= 0;
4492

    
4493
    s->parse_context.state= -1;
4494
    s->parse_context.frame_start_found= 0;
4495
    s->parse_context.overread= 0;
4496
    s->parse_context.overread_index= 0;
4497
    s->parse_context.index= 0;
4498
    s->parse_context.last_index= 0;
4499
    s->bitstream_buffer_size=0;
4500
}
4501

    
4502
#ifdef CONFIG_ENCODERS
4503
void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4504
{
4505
    const uint16_t *srcw= (uint16_t*)src;
4506
    int words= length>>4;
4507
    int bits= length&15;
4508
    int i;
4509

    
4510
    if(length==0) return;
4511

    
4512
    if(words < 16){
4513
        for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4514
    }else if(put_bits_count(pb)&7){
4515
        for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4516
    }else{
4517
        for(i=0; put_bits_count(pb)&31; i++)
4518
            put_bits(pb, 8, src[i]);
4519
        flush_put_bits(pb);
4520
        memcpy(pbBufPtr(pb), src+i, 2*words-i);
4521
        skip_put_bytes(pb, 2*words-i);
4522
    }
4523

    
4524
    put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4525
}
4526

    
4527
static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4528
    int i;
4529

    
4530
    memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4531

    
4532
    /* mpeg1 */
4533
    d->mb_skip_run= s->mb_skip_run;
4534
    for(i=0; i<3; i++)
4535
        d->last_dc[i]= s->last_dc[i];
4536

    
4537
    /* statistics */
4538
    d->mv_bits= s->mv_bits;
4539
    d->i_tex_bits= s->i_tex_bits;
4540
    d->p_tex_bits= s->p_tex_bits;
4541
    d->i_count= s->i_count;
4542
    d->f_count= s->f_count;
4543
    d->b_count= s->b_count;
4544
    d->skip_count= s->skip_count;
4545
    d->misc_bits= s->misc_bits;
4546
    d->last_bits= 0;
4547

    
4548
    d->mb_skipped= 0;
4549
    d->qscale= s->qscale;
4550
    d->dquant= s->dquant;
4551
}
4552

    
4553
static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4554
    int i;
4555

    
4556
    memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4557
    memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4558

    
4559
    /* mpeg1 */
4560
    d->mb_skip_run= s->mb_skip_run;
4561
    for(i=0; i<3; i++)
4562
        d->last_dc[i]= s->last_dc[i];
4563

    
4564
    /* statistics */
4565
    d->mv_bits= s->mv_bits;
4566
    d->i_tex_bits= s->i_tex_bits;
4567
    d->p_tex_bits= s->p_tex_bits;
4568
    d->i_count= s->i_count;
4569
    d->f_count= s->f_count;
4570
    d->b_count= s->b_count;
4571
    d->skip_count= s->skip_count;
4572
    d->misc_bits= s->misc_bits;
4573

    
4574
    d->mb_intra= s->mb_intra;
4575
    d->mb_skipped= s->mb_skipped;
4576
    d->mv_type= s->mv_type;
4577
    d->mv_dir= s->mv_dir;
4578
    d->pb= s->pb;
4579
    if(s->data_partitioning){
4580
        d->pb2= s->pb2;
4581
        d->tex_pb= s->tex_pb;
4582
    }
4583
    d->block= s->block;
4584
    for(i=0; i<6; i++)
4585
        d->block_last_index[i]= s->block_last_index[i];
4586
    d->interlaced_dct= s->interlaced_dct;
4587
    d->qscale= s->qscale;
4588
}
4589

    
4590
static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4591
                           PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4592
                           int *dmin, int *next_block, int motion_x, int motion_y)
4593
{
4594
    int score;
4595
    uint8_t *dest_backup[3];
4596

    
4597
    copy_context_before_encode(s, backup, type);
4598

    
4599
    s->block= s->blocks[*next_block];
4600
    s->pb= pb[*next_block];
4601
    if(s->data_partitioning){
4602
        s->pb2   = pb2   [*next_block];
4603
        s->tex_pb= tex_pb[*next_block];
4604
    }
4605

    
4606
    if(*next_block){
4607
        memcpy(dest_backup, s->dest, sizeof(s->dest));
4608
        s->dest[0] = s->rd_scratchpad;
4609
        s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4610
        s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4611
        assert(s->linesize >= 32); //FIXME
4612
    }
4613

    
4614
    encode_mb(s, motion_x, motion_y);
4615

    
4616
    score= put_bits_count(&s->pb);
4617
    if(s->data_partitioning){
4618
        score+= put_bits_count(&s->pb2);
4619
        score+= put_bits_count(&s->tex_pb);
4620
    }
4621

    
4622
    if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4623
        MPV_decode_mb(s, s->block);
4624

    
4625
        score *= s->lambda2;
4626
        score += sse_mb(s) << FF_LAMBDA_SHIFT;
4627
    }
4628

    
4629
    if(*next_block){
4630
        memcpy(s->dest, dest_backup, sizeof(s->dest));
4631
    }
4632

    
4633
    if(score<*dmin){
4634
        *dmin= score;
4635
        *next_block^=1;
4636

    
4637
        copy_context_after_encode(best, s, type);
4638
    }
4639
}
4640

    
4641
static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4642
    uint32_t *sq = squareTbl + 256;
4643
    int acc=0;
4644
    int x,y;
4645

    
4646
    if(w==16 && h==16)
4647
        return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4648
    else if(w==8 && h==8)
4649
        return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4650

    
4651
    for(y=0; y<h; y++){
4652
        for(x=0; x<w; x++){
4653
            acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4654
        }
4655
    }
4656

    
4657
    assert(acc>=0);
4658

    
4659
    return acc;
4660
}
4661

    
4662
static int sse_mb(MpegEncContext *s){
4663
    int w= 16;
4664
    int h= 16;
4665

    
4666
    if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4667
    if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4668

    
4669
    if(w==16 && h==16)
4670
      if(s->avctx->mb_cmp == FF_CMP_NSSE){
4671
        return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4672
               +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4673
               +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4674
      }else{