Statistics
| Branch: | Revision:

ffmpeg / libavcodec / mpegvideo.c @ 68b51e58

History | View | Annotate | Download (241 KB)

1
/*
2
 * The simplest mpeg encoder (well, it was the simplest!)
3
 * Copyright (c) 2000,2001 Fabrice Bellard.
4
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 *
20
 * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
21
 */
22

    
23
/**
24
 * @file mpegvideo.c
25
 * The simplest mpeg encoder (well, it was the simplest!).
26
 */
27

    
28
#include "avcodec.h"
29
#include "dsputil.h"
30
#include "mpegvideo.h"
31
#include "faandct.h"
32
#include <limits.h>
33

    
34
#ifdef USE_FASTMEMCPY
35
#include "fastmemcpy.h"
36
#endif
37

    
38
//#undef NDEBUG
39
//#include <assert.h>
40

    
41
#ifdef CONFIG_ENCODERS
42
static void encode_picture(MpegEncContext *s, int picture_number);
43
#endif //CONFIG_ENCODERS
44
static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
45
                                   DCTELEM *block, int n, int qscale);
46
static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
47
                                   DCTELEM *block, int n, int qscale);
48
static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
49
                                   DCTELEM *block, int n, int qscale);
50
static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
51
                                   DCTELEM *block, int n, int qscale);
52
static void dct_unquantize_h263_intra_c(MpegEncContext *s,
53
                                  DCTELEM *block, int n, int qscale);
54
static void dct_unquantize_h263_inter_c(MpegEncContext *s,
55
                                  DCTELEM *block, int n, int qscale);
56
static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
57
#ifdef CONFIG_ENCODERS
58
static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
59
static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
60
static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
61
static int sse_mb(MpegEncContext *s);
62
static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
63
#endif //CONFIG_ENCODERS
64

    
65
#ifdef HAVE_XVMC
66
extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
67
extern void XVMC_field_end(MpegEncContext *s);
68
extern void XVMC_decode_mb(MpegEncContext *s);
69
#endif
70

    
71
void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
72

    
73

    
74
/* enable all paranoid tests for rounding, overflows, etc... */
75
//#define PARANOID
76

    
77
//#define DEBUG
78

    
79

    
80
/* for jpeg fast DCT */
81
#define CONST_BITS 14
82

    
83
static const uint16_t aanscales[64] = {
84
    /* precomputed values scaled up by 14 bits */
85
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
86
    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
87
    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
88
    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
89
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
90
    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
91
    8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
92
    4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
93
};
94

    
95
static const uint8_t h263_chroma_roundtab[16] = {
96
//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
97
    0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
98
};
99

    
100
static const uint8_t ff_default_chroma_qscale_table[32]={
101
//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
102
    0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
103
};
104

    
105
#ifdef CONFIG_ENCODERS
106
static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
107
static uint8_t default_fcode_tab[MAX_MV*2+1];
108

    
109
enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
110

    
111
static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
112
                           const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
113
{
114
    int qscale;
115
    int shift=0;
116

    
117
    for(qscale=qmin; qscale<=qmax; qscale++){
118
        int i;
119
        if (dsp->fdct == ff_jpeg_fdct_islow
120
#ifdef FAAN_POSTSCALE
121
            || dsp->fdct == ff_faandct
122
#endif
123
            ) {
124
            for(i=0;i<64;i++) {
125
                const int j= dsp->idct_permutation[i];
126
                /* 16 <= qscale * quant_matrix[i] <= 7905 */
127
                /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
128
                /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
129
                /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
130

    
131
                qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
132
                                (qscale * quant_matrix[j]));
133
            }
134
        } else if (dsp->fdct == fdct_ifast
135
#ifndef FAAN_POSTSCALE
136
                   || dsp->fdct == ff_faandct
137
#endif
138
                   ) {
139
            for(i=0;i<64;i++) {
140
                const int j= dsp->idct_permutation[i];
141
                /* 16 <= qscale * quant_matrix[i] <= 7905 */
142
                /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
143
                /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
144
                /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
145

    
146
                qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
147
                                (aanscales[i] * qscale * quant_matrix[j]));
148
            }
149
        } else {
150
            for(i=0;i<64;i++) {
151
                const int j= dsp->idct_permutation[i];
152
                /* We can safely suppose that 16 <= quant_matrix[i] <= 255
153
                   So 16           <= qscale * quant_matrix[i]             <= 7905
154
                   so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
155
                   so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
156
                */
157
                qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
158
//                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
159
                qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
160

    
161
                if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
162
                qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
163
            }
164
        }
165

    
166
        for(i=intra; i<64; i++){
167
            int64_t max= 8191;
168
            if (dsp->fdct == fdct_ifast
169
#ifndef FAAN_POSTSCALE
170
                   || dsp->fdct == ff_faandct
171
#endif
172
                   ) {
173
                max= (8191LL*aanscales[i]) >> 14;
174
            }
175
            while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
176
                shift++;
177
            }
178
        }
179
    }
180
    if(shift){
181
        av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
182
    }
183
}
184

    
185
static inline void update_qscale(MpegEncContext *s){
186
    s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
187
    s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
188

    
189
    s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
190
}
191
#endif //CONFIG_ENCODERS
192

    
193
void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
194
    int i;
195
    int end;
196

    
197
    st->scantable= src_scantable;
198

    
199
    for(i=0; i<64; i++){
200
        int j;
201
        j = src_scantable[i];
202
        st->permutated[i] = permutation[j];
203
#ifdef ARCH_POWERPC
204
        st->inverse[j] = i;
205
#endif
206
    }
207

    
208
    end=-1;
209
    for(i=0; i<64; i++){
210
        int j;
211
        j = st->permutated[i];
212
        if(j>end) end=j;
213
        st->raster_end[i]= end;
214
    }
215
}
216

    
217
#ifdef CONFIG_ENCODERS
218
void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
219
    int i;
220

    
221
    if(matrix){
222
        put_bits(pb, 1, 1);
223
        for(i=0;i<64;i++) {
224
            put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
225
        }
226
    }else
227
        put_bits(pb, 1, 0);
228
}
229
#endif //CONFIG_ENCODERS
230

    
231
const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
232
    int i;
233

    
234
    for(i=0; i<3; i++){
235
        uint32_t tmp= *state << 8;
236
        *state= tmp + *(p++);
237
        if(tmp == 0x100 || p==end)
238
            return p;
239
    }
240

    
241
    while(p<end){
242
        if     (p[-1] > 1      ) p+= 3;
243
        else if(p[-2]          ) p+= 2;
244
        else if(p[-3]|(p[-1]-1)) p++;
245
        else{
246
            p++;
247
            break;
248
        }
249
    }
250

    
251
    p= FFMIN(p, end)-4;
252
    *state=  be2me_32(unaligned32(p));
253

    
254
    return p+4;
255
}
256

    
257
/* init common dct for both encoder and decoder */
258
int DCT_common_init(MpegEncContext *s)
259
{
260
    s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
261
    s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
262
    s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
263
    s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
264
    s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
265
    s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
266

    
267
#ifdef CONFIG_ENCODERS
268
    s->dct_quantize= dct_quantize_c;
269
    s->denoise_dct= denoise_dct_c;
270
#endif //CONFIG_ENCODERS
271

    
272
#ifdef HAVE_MMX
273
    MPV_common_init_mmx(s);
274
#endif
275
#ifdef ARCH_ALPHA
276
    MPV_common_init_axp(s);
277
#endif
278
#ifdef HAVE_MLIB
279
    MPV_common_init_mlib(s);
280
#endif
281
#ifdef HAVE_MMI
282
    MPV_common_init_mmi(s);
283
#endif
284
#ifdef ARCH_ARMV4L
285
    MPV_common_init_armv4l(s);
286
#endif
287
#ifdef ARCH_POWERPC
288
    MPV_common_init_ppc(s);
289
#endif
290

    
291
#ifdef CONFIG_ENCODERS
292
    s->fast_dct_quantize= s->dct_quantize;
293

    
294
    if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
295
        s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
296
    }
297

    
298
#endif //CONFIG_ENCODERS
299

    
300
    /* load & permutate scantables
301
       note: only wmv uses different ones
302
    */
303
    if(s->alternate_scan){
304
        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
305
        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
306
    }else{
307
        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
308
        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
309
    }
310
    ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
311
    ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
312

    
313
    return 0;
314
}
315

    
316
static void copy_picture(Picture *dst, Picture *src){
317
    *dst = *src;
318
    dst->type= FF_BUFFER_TYPE_COPY;
319
}
320

    
321
static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
322
    int i;
323

    
324
    dst->pict_type              = src->pict_type;
325
    dst->quality                = src->quality;
326
    dst->coded_picture_number   = src->coded_picture_number;
327
    dst->display_picture_number = src->display_picture_number;
328
//    dst->reference              = src->reference;
329
    dst->pts                    = src->pts;
330
    dst->interlaced_frame       = src->interlaced_frame;
331
    dst->top_field_first        = src->top_field_first;
332

    
333
    if(s->avctx->me_threshold){
334
        if(!src->motion_val[0])
335
            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
336
        if(!src->mb_type)
337
            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
338
        if(!src->ref_index[0])
339
            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
340
        if(src->motion_subsample_log2 != dst->motion_subsample_log2)
341
            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
342
            src->motion_subsample_log2, dst->motion_subsample_log2);
343

    
344
        memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
345

    
346
        for(i=0; i<2; i++){
347
            int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
348
            int height= ((16*s->mb_height)>>src->motion_subsample_log2);
349

    
350
            if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
351
                memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
352
            }
353
            if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
354
                memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
355
            }
356
        }
357
    }
358
}
359

    
360
/**
361
 * allocates a Picture
362
 * The pixels are allocated/set by calling get_buffer() if shared=0
363
 */
364
static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
365
    const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
366
    const int mb_array_size= s->mb_stride*s->mb_height;
367
    const int b8_array_size= s->b8_stride*s->mb_height*2;
368
    const int b4_array_size= s->b4_stride*s->mb_height*4;
369
    int i;
370

    
371
    if(shared){
372
        assert(pic->data[0]);
373
        assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
374
        pic->type= FF_BUFFER_TYPE_SHARED;
375
    }else{
376
        int r;
377

    
378
        assert(!pic->data[0]);
379

    
380
        r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
381

    
382
        if(r<0 || !pic->age || !pic->type || !pic->data[0]){
383
            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
384
            return -1;
385
        }
386

    
387
        if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
388
            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
389
            return -1;
390
        }
391

    
392
        if(pic->linesize[1] != pic->linesize[2]){
393
            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
394
            return -1;
395
        }
396

    
397
        s->linesize  = pic->linesize[0];
398
        s->uvlinesize= pic->linesize[1];
399
    }
400

    
401
    if(pic->qscale_table==NULL){
402
        if (s->encoding) {
403
            CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
404
            CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
405
            CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
406
        }
407

    
408
        CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
409
        CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
410
        CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
411
        pic->mb_type= pic->mb_type_base + s->mb_stride+1;
412
        if(s->out_format == FMT_H264){
413
            for(i=0; i<2; i++){
414
                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
415
                pic->motion_val[i]= pic->motion_val_base[i]+4;
416
                CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
417
            }
418
            pic->motion_subsample_log2= 2;
419
        }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
420
            for(i=0; i<2; i++){
421
                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
422
                pic->motion_val[i]= pic->motion_val_base[i]+4;
423
                CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
424
            }
425
            pic->motion_subsample_log2= 3;
426
        }
427
        if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
428
            CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
429
        }
430
        pic->qstride= s->mb_stride;
431
        CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
432
    }
433

    
434
    //it might be nicer if the application would keep track of these but it would require a API change
435
    memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
436
    s->prev_pict_types[0]= s->pict_type;
437
    if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
438
        pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
439

    
440
    return 0;
441
fail: //for the CHECKED_ALLOCZ macro
442
    return -1;
443
}
444

    
445
/**
446
 * deallocates a picture
447
 */
448
static void free_picture(MpegEncContext *s, Picture *pic){
449
    int i;
450

    
451
    if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
452
        s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
453
    }
454

    
455
    av_freep(&pic->mb_var);
456
    av_freep(&pic->mc_mb_var);
457
    av_freep(&pic->mb_mean);
458
    av_freep(&pic->mbskip_table);
459
    av_freep(&pic->qscale_table);
460
    av_freep(&pic->mb_type_base);
461
    av_freep(&pic->dct_coeff);
462
    av_freep(&pic->pan_scan);
463
    pic->mb_type= NULL;
464
    for(i=0; i<2; i++){
465
        av_freep(&pic->motion_val_base[i]);
466
        av_freep(&pic->ref_index[i]);
467
    }
468

    
469
    if(pic->type == FF_BUFFER_TYPE_SHARED){
470
        for(i=0; i<4; i++){
471
            pic->base[i]=
472
            pic->data[i]= NULL;
473
        }
474
        pic->type= 0;
475
    }
476
}
477

    
478
static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
479
    int i;
480

    
481
    // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
482
    CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
483
    s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
484

    
485
     //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
486
    CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
487
    s->rd_scratchpad=   s->me.scratchpad;
488
    s->b_scratchpad=    s->me.scratchpad;
489
    s->obmc_scratchpad= s->me.scratchpad + 16;
490
    if (s->encoding) {
491
        CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
492
        CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
493
        if(s->avctx->noise_reduction){
494
            CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
495
        }
496
    }
497
    CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
498
    s->block= s->blocks[0];
499

    
500
    for(i=0;i<12;i++){
501
        s->pblocks[i] = (short *)(&s->block[i]);
502
    }
503
    return 0;
504
fail:
505
    return -1; //free() through MPV_common_end()
506
}
507

    
508
static void free_duplicate_context(MpegEncContext *s){
509
    if(s==NULL) return;
510

    
511
    av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
512
    av_freep(&s->me.scratchpad);
513
    s->rd_scratchpad=
514
    s->b_scratchpad=
515
    s->obmc_scratchpad= NULL;
516

    
517
    av_freep(&s->dct_error_sum);
518
    av_freep(&s->me.map);
519
    av_freep(&s->me.score_map);
520
    av_freep(&s->blocks);
521
    s->block= NULL;
522
}
523

    
524
static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
525
#define COPY(a) bak->a= src->a
526
    COPY(allocated_edge_emu_buffer);
527
    COPY(edge_emu_buffer);
528
    COPY(me.scratchpad);
529
    COPY(rd_scratchpad);
530
    COPY(b_scratchpad);
531
    COPY(obmc_scratchpad);
532
    COPY(me.map);
533
    COPY(me.score_map);
534
    COPY(blocks);
535
    COPY(block);
536
    COPY(start_mb_y);
537
    COPY(end_mb_y);
538
    COPY(me.map_generation);
539
    COPY(pb);
540
    COPY(dct_error_sum);
541
    COPY(dct_count[0]);
542
    COPY(dct_count[1]);
543
#undef COPY
544
}
545

    
546
void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
547
    MpegEncContext bak;
548
    int i;
549
    //FIXME copy only needed parts
550
//START_TIMER
551
    backup_duplicate_context(&bak, dst);
552
    memcpy(dst, src, sizeof(MpegEncContext));
553
    backup_duplicate_context(dst, &bak);
554
    for(i=0;i<12;i++){
555
        dst->pblocks[i] = (short *)(&dst->block[i]);
556
    }
557
//STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
558
}
559

    
560
static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
561
#define COPY(a) dst->a= src->a
562
    COPY(pict_type);
563
    COPY(current_picture);
564
    COPY(f_code);
565
    COPY(b_code);
566
    COPY(qscale);
567
    COPY(lambda);
568
    COPY(lambda2);
569
    COPY(picture_in_gop_number);
570
    COPY(gop_picture_number);
571
    COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
572
    COPY(progressive_frame); //FIXME don't set in encode_header
573
    COPY(partitioned_frame); //FIXME don't set in encode_header
574
#undef COPY
575
}
576

    
577
/**
578
 * sets the given MpegEncContext to common defaults (same for encoding and decoding).
579
 * the changed fields will not depend upon the prior state of the MpegEncContext.
580
 */
581
static void MPV_common_defaults(MpegEncContext *s){
582
    s->y_dc_scale_table=
583
    s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
584
    s->chroma_qscale_table= ff_default_chroma_qscale_table;
585
    s->progressive_frame= 1;
586
    s->progressive_sequence= 1;
587
    s->picture_structure= PICT_FRAME;
588

    
589
    s->coded_picture_number = 0;
590
    s->picture_number = 0;
591
    s->input_picture_number = 0;
592

    
593
    s->picture_in_gop_number = 0;
594

    
595
    s->f_code = 1;
596
    s->b_code = 1;
597
}
598

    
599
/**
600
 * sets the given MpegEncContext to defaults for decoding.
601
 * the changed fields will not depend upon the prior state of the MpegEncContext.
602
 */
603
void MPV_decode_defaults(MpegEncContext *s){
604
    MPV_common_defaults(s);
605
}
606

    
607
/**
608
 * sets the given MpegEncContext to defaults for encoding.
609
 * the changed fields will not depend upon the prior state of the MpegEncContext.
610
 */
611

    
612
#ifdef CONFIG_ENCODERS
613
static void MPV_encode_defaults(MpegEncContext *s){
614
    static int done=0;
615

    
616
    MPV_common_defaults(s);
617

    
618
    if(!done){
619
        int i;
620
        done=1;
621

    
622
        default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
623
        memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
624

    
625
        for(i=-16; i<16; i++){
626
            default_fcode_tab[i + MAX_MV]= 1;
627
        }
628
    }
629
    s->me.mv_penalty= default_mv_penalty;
630
    s->fcode_tab= default_fcode_tab;
631
}
632
#endif //CONFIG_ENCODERS
633

    
634
/**
635
 * init common structure for both encoder and decoder.
636
 * this assumes that some variables like width/height are already set
637
 */
638
int MPV_common_init(MpegEncContext *s)
639
{
640
    int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
641

    
642
    s->mb_height = (s->height + 15) / 16;
643

    
644
    if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
645
        av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
646
        return -1;
647
    }
648

    
649
    if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
650
        return -1;
651

    
652
    dsputil_init(&s->dsp, s->avctx);
653
    DCT_common_init(s);
654

    
655
    s->flags= s->avctx->flags;
656
    s->flags2= s->avctx->flags2;
657

    
658
    s->mb_width  = (s->width  + 15) / 16;
659
    s->mb_stride = s->mb_width + 1;
660
    s->b8_stride = s->mb_width*2 + 1;
661
    s->b4_stride = s->mb_width*4 + 1;
662
    mb_array_size= s->mb_height * s->mb_stride;
663
    mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
664

    
665
    /* set chroma shifts */
666
    avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
667
                                                    &(s->chroma_y_shift) );
668

    
669
    /* set default edge pos, will be overriden in decode_header if needed */
670
    s->h_edge_pos= s->mb_width*16;
671
    s->v_edge_pos= s->mb_height*16;
672

    
673
    s->mb_num = s->mb_width * s->mb_height;
674

    
675
    s->block_wrap[0]=
676
    s->block_wrap[1]=
677
    s->block_wrap[2]=
678
    s->block_wrap[3]= s->b8_stride;
679
    s->block_wrap[4]=
680
    s->block_wrap[5]= s->mb_stride;
681

    
682
    y_size = s->b8_stride * (2 * s->mb_height + 1);
683
    c_size = s->mb_stride * (s->mb_height + 1);
684
    yc_size = y_size + 2 * c_size;
685

    
686
    /* convert fourcc to upper case */
687
    s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
688
                        + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
689
                        + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
690
                        + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
691

    
692
    s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
693
                               + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
694
                               + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
695
                               + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
696

    
697
    s->avctx->coded_frame= (AVFrame*)&s->current_picture;
698

    
699
    CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
700
    for(y=0; y<s->mb_height; y++){
701
        for(x=0; x<s->mb_width; x++){
702
            s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
703
        }
704
    }
705
    s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
706

    
707
    if (s->encoding) {
708
        /* Allocate MV tables */
709
        CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
710
        CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
711
        CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
712
        CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
713
        CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
714
        CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
715
        s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
716
        s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
717
        s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
718
        s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
719
        s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
720
        s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
721

    
722
        if(s->msmpeg4_version){
723
            CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
724
        }
725
        CHECKED_ALLOCZ(s->avctx->stats_out, 256);
726

    
727
        /* Allocate MB type table */
728
        CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
729

    
730
        CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
731

    
732
        CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
733
        CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
734
        CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
735
        CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
736
        CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
737
        CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
738

    
739
        if(s->avctx->noise_reduction){
740
            CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
741
        }
742
    }
743
    CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
744

    
745
    CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
746

    
747
    if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
748
        /* interlaced direct mode decoding tables */
749
            for(i=0; i<2; i++){
750
                int j, k;
751
                for(j=0; j<2; j++){
752
                    for(k=0; k<2; k++){
753
                        CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
754
                        s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
755
                    }
756
                    CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
757
                    CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
758
                    s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
759
                }
760
                CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
761
            }
762
    }
763
    if (s->out_format == FMT_H263) {
764
        /* ac values */
765
        CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
766
        s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
767
        s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
768
        s->ac_val[2] = s->ac_val[1] + c_size;
769

    
770
        /* cbp values */
771
        CHECKED_ALLOCZ(s->coded_block_base, y_size);
772
        s->coded_block= s->coded_block_base + s->b8_stride + 1;
773

    
774
        /* cbp, ac_pred, pred_dir */
775
        CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
776
        CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
777
    }
778

    
779
    if (s->h263_pred || s->h263_plus || !s->encoding) {
780
        /* dc values */
781
        //MN: we need these for error resilience of intra-frames
782
        CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
783
        s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
784
        s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
785
        s->dc_val[2] = s->dc_val[1] + c_size;
786
        for(i=0;i<yc_size;i++)
787
            s->dc_val_base[i] = 1024;
788
    }
789

    
790
    /* which mb is a intra block */
791
    CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
792
    memset(s->mbintra_table, 1, mb_array_size);
793

    
794
    /* init macroblock skip table */
795
    CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
796
    //Note the +1 is for a quicker mpeg4 slice_end detection
797
    CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
798

    
799
    s->parse_context.state= -1;
800
    if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
801
       s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
802
       s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
803
       s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
804
    }
805

    
806
    s->context_initialized = 1;
807

    
808
    s->thread_context[0]= s;
809
    for(i=1; i<s->avctx->thread_count; i++){
810
        s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
811
        memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
812
    }
813

    
814
    for(i=0; i<s->avctx->thread_count; i++){
815
        if(init_duplicate_context(s->thread_context[i], s) < 0)
816
           goto fail;
817
        s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
818
        s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
819
    }
820

    
821
    return 0;
822
 fail:
823
    MPV_common_end(s);
824
    return -1;
825
}
826

    
827
/* init common structure for both encoder and decoder */
828
void MPV_common_end(MpegEncContext *s)
829
{
830
    int i, j, k;
831

    
832
    for(i=0; i<s->avctx->thread_count; i++){
833
        free_duplicate_context(s->thread_context[i]);
834
    }
835
    for(i=1; i<s->avctx->thread_count; i++){
836
        av_freep(&s->thread_context[i]);
837
    }
838

    
839
    av_freep(&s->parse_context.buffer);
840
    s->parse_context.buffer_size=0;
841

    
842
    av_freep(&s->mb_type);
843
    av_freep(&s->p_mv_table_base);
844
    av_freep(&s->b_forw_mv_table_base);
845
    av_freep(&s->b_back_mv_table_base);
846
    av_freep(&s->b_bidir_forw_mv_table_base);
847
    av_freep(&s->b_bidir_back_mv_table_base);
848
    av_freep(&s->b_direct_mv_table_base);
849
    s->p_mv_table= NULL;
850
    s->b_forw_mv_table= NULL;
851
    s->b_back_mv_table= NULL;
852
    s->b_bidir_forw_mv_table= NULL;
853
    s->b_bidir_back_mv_table= NULL;
854
    s->b_direct_mv_table= NULL;
855
    for(i=0; i<2; i++){
856
        for(j=0; j<2; j++){
857
            for(k=0; k<2; k++){
858
                av_freep(&s->b_field_mv_table_base[i][j][k]);
859
                s->b_field_mv_table[i][j][k]=NULL;
860
            }
861
            av_freep(&s->b_field_select_table[i][j]);
862
            av_freep(&s->p_field_mv_table_base[i][j]);
863
            s->p_field_mv_table[i][j]=NULL;
864
        }
865
        av_freep(&s->p_field_select_table[i]);
866
    }
867

    
868
    av_freep(&s->dc_val_base);
869
    av_freep(&s->ac_val_base);
870
    av_freep(&s->coded_block_base);
871
    av_freep(&s->mbintra_table);
872
    av_freep(&s->cbp_table);
873
    av_freep(&s->pred_dir_table);
874

    
875
    av_freep(&s->mbskip_table);
876
    av_freep(&s->prev_pict_types);
877
    av_freep(&s->bitstream_buffer);
878
    s->allocated_bitstream_buffer_size=0;
879

    
880
    av_freep(&s->avctx->stats_out);
881
    av_freep(&s->ac_stats);
882
    av_freep(&s->error_status_table);
883
    av_freep(&s->mb_index2xy);
884
    av_freep(&s->lambda_table);
885
    av_freep(&s->q_intra_matrix);
886
    av_freep(&s->q_inter_matrix);
887
    av_freep(&s->q_intra_matrix16);
888
    av_freep(&s->q_inter_matrix16);
889
    av_freep(&s->input_picture);
890
    av_freep(&s->reordered_input_picture);
891
    av_freep(&s->dct_offset);
892

    
893
    if(s->picture){
894
        for(i=0; i<MAX_PICTURE_COUNT; i++){
895
            free_picture(s, &s->picture[i]);
896
        }
897
    }
898
    av_freep(&s->picture);
899
    s->context_initialized = 0;
900
    s->last_picture_ptr=
901
    s->next_picture_ptr=
902
    s->current_picture_ptr= NULL;
903
    s->linesize= s->uvlinesize= 0;
904

    
905
    for(i=0; i<3; i++)
906
        av_freep(&s->visualization_buffer[i]);
907

    
908
    avcodec_default_free_buffers(s->avctx);
909
}
910

    
911
#ifdef CONFIG_ENCODERS
912

    
913
/* init video encoder */
914
int MPV_encode_init(AVCodecContext *avctx)
915
{
916
    MpegEncContext *s = avctx->priv_data;
917
    int i;
918
    int chroma_h_shift, chroma_v_shift;
919

    
920
    MPV_encode_defaults(s);
921

    
922
    if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
923
        av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
924
        return -1;
925
    }
926

    
927
    if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
928
        if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUVJ420P){
929
            av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
930
            return -1;
931
        }
932
    }else{
933
        if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUV420P){
934
            av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
935
            return -1;
936
        }
937
    }
938

    
939
    s->bit_rate = avctx->bit_rate;
940
    s->width = avctx->width;
941
    s->height = avctx->height;
942
    if(avctx->gop_size > 600){
943
        av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
944
        avctx->gop_size=600;
945
    }
946
    s->gop_size = avctx->gop_size;
947
    s->avctx = avctx;
948
    s->flags= avctx->flags;
949
    s->flags2= avctx->flags2;
950
    s->max_b_frames= avctx->max_b_frames;
951
    s->codec_id= avctx->codec->id;
952
    s->luma_elim_threshold  = avctx->luma_elim_threshold;
953
    s->chroma_elim_threshold= avctx->chroma_elim_threshold;
954
    s->strict_std_compliance= avctx->strict_std_compliance;
955
    s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
956
    s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
957
    s->mpeg_quant= avctx->mpeg_quant;
958
    s->rtp_mode= !!avctx->rtp_payload_size;
959
    s->intra_dc_precision= avctx->intra_dc_precision;
960
    s->user_specified_pts = AV_NOPTS_VALUE;
961

    
962
    if (s->gop_size <= 1) {
963
        s->intra_only = 1;
964
        s->gop_size = 12;
965
    } else {
966
        s->intra_only = 0;
967
    }
968

    
969
    s->me_method = avctx->me_method;
970

    
971
    /* Fixed QSCALE */
972
    s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
973

    
974
    s->adaptive_quant= (   s->avctx->lumi_masking
975
                        || s->avctx->dark_masking
976
                        || s->avctx->temporal_cplx_masking
977
                        || s->avctx->spatial_cplx_masking
978
                        || s->avctx->p_masking
979
                        || s->avctx->border_masking
980
                        || (s->flags&CODEC_FLAG_QP_RD))
981
                       && !s->fixed_qscale;
982

    
983
    s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
984
    s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
985
    s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
986

    
987
    if(avctx->rc_max_rate && !avctx->rc_buffer_size){
988
        av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
989
        return -1;
990
    }
991

    
992
    if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
993
        av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
994
    }
995

    
996
    if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
997
        av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
998
        return -1;
999
    }
1000

    
1001
    if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1002
        av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1003
        return -1;
1004
    }
1005

    
1006
    if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1007
       && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1008
       && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1009

    
1010
        av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1011
    }
1012

    
1013
    if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1014
       && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1015
        av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1016
        return -1;
1017
    }
1018

    
1019
    if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1020
        av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1021
        return -1;
1022
    }
1023

    
1024
    if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1025
        av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1026
        return -1;
1027
    }
1028

    
1029
    if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1030
        av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1031
        return -1;
1032
    }
1033

    
1034
    if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1035
        av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1036
        return -1;
1037
    }
1038

    
1039
    if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1040
        av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1041
        return -1;
1042
    }
1043

    
1044
    if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1045
       && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1046
        av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1047
        return -1;
1048
    }
1049

    
1050
    if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1051
        av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1052
        return -1;
1053
    }
1054

    
1055
    if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1056
        av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1057
        return -1;
1058
    }
1059

    
1060
    if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1061
        av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1062
        return -1;
1063
    }
1064

    
1065
    if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1066
        av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1067
        return -1;
1068
    }
1069

    
1070
    if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1071
       && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1072
       && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1073
        av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1074
        return -1;
1075
    }
1076

    
1077
    if(s->avctx->thread_count > 1)
1078
        s->rtp_mode= 1;
1079

    
1080
    if(!avctx->time_base.den || !avctx->time_base.num){
1081
        av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1082
        return -1;
1083
    }
1084

    
1085
    i= (INT_MAX/2+128)>>8;
1086
    if(avctx->me_threshold >= i){
1087
        av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1088
        return -1;
1089
    }
1090
    if(avctx->mb_threshold >= i){
1091
        av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1092
        return -1;
1093
    }
1094

    
1095
    if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1096
        av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass\n");
1097
        return -1;
1098
    }
1099

    
1100
    i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1101
    if(i > 1){
1102
        av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1103
        avctx->time_base.den /= i;
1104
        avctx->time_base.num /= i;
1105
//        return -1;
1106
    }
1107

    
1108
    if(s->codec_id==CODEC_ID_MJPEG){
1109
        s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1110
        s->inter_quant_bias= 0;
1111
    }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1112
        s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1113
        s->inter_quant_bias= 0;
1114
    }else{
1115
        s->intra_quant_bias=0;
1116
        s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1117
    }
1118

    
1119
    if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1120
        s->intra_quant_bias= avctx->intra_quant_bias;
1121
    if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1122
        s->inter_quant_bias= avctx->inter_quant_bias;
1123

    
1124
    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1125

    
1126
    if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1127
        av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1128
        return -1;
1129
    }
1130
    s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1131

    
1132
    switch(avctx->codec->id) {
1133
    case CODEC_ID_MPEG1VIDEO:
1134
        s->out_format = FMT_MPEG1;
1135
        s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1136
        avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1137
        break;
1138
    case CODEC_ID_MPEG2VIDEO:
1139
        s->out_format = FMT_MPEG1;
1140
        s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1141
        avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1142
        s->rtp_mode= 1;
1143
        break;
1144
    case CODEC_ID_LJPEG:
1145
    case CODEC_ID_JPEGLS:
1146
    case CODEC_ID_MJPEG:
1147
        s->out_format = FMT_MJPEG;
1148
        s->intra_only = 1; /* force intra only for jpeg */
1149
        s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1150
        s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1151
        s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1152
        s->mjpeg_vsample[1] = 1;
1153
        s->mjpeg_vsample[2] = 1;
1154
        s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1155
        s->mjpeg_hsample[1] = 1;
1156
        s->mjpeg_hsample[2] = 1;
1157
        if (mjpeg_init(s) < 0)
1158
            return -1;
1159
        avctx->delay=0;
1160
        s->low_delay=1;
1161
        break;
1162
    case CODEC_ID_H261:
1163
        s->out_format = FMT_H261;
1164
        avctx->delay=0;
1165
        s->low_delay=1;
1166
        break;
1167
    case CODEC_ID_H263:
1168
        if (h263_get_picture_format(s->width, s->height) == 7) {
1169
            av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1170
            return -1;
1171
        }
1172
        s->out_format = FMT_H263;
1173
        s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1174
        avctx->delay=0;
1175
        s->low_delay=1;
1176
        break;
1177
    case CODEC_ID_H263P:
1178
        s->out_format = FMT_H263;
1179
        s->h263_plus = 1;
1180
        /* Fx */
1181
        s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1182
        s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1183
        s->modified_quant= s->h263_aic;
1184
        s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1185
        s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1186
        s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1187
        s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1188
        s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1189

    
1190
        /* /Fx */
1191
        /* These are just to be sure */
1192
        avctx->delay=0;
1193
        s->low_delay=1;
1194
        break;
1195
    case CODEC_ID_FLV1:
1196
        s->out_format = FMT_H263;
1197
        s->h263_flv = 2; /* format = 1; 11-bit codes */
1198
        s->unrestricted_mv = 1;
1199
        s->rtp_mode=0; /* don't allow GOB */
1200
        avctx->delay=0;
1201
        s->low_delay=1;
1202
        break;
1203
    case CODEC_ID_RV10:
1204
        s->out_format = FMT_H263;
1205
        avctx->delay=0;
1206
        s->low_delay=1;
1207
        break;
1208
    case CODEC_ID_RV20:
1209
        s->out_format = FMT_H263;
1210
        avctx->delay=0;
1211
        s->low_delay=1;
1212
        s->modified_quant=1;
1213
        s->h263_aic=1;
1214
        s->h263_plus=1;
1215
        s->loop_filter=1;
1216
        s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1217
        break;
1218
    case CODEC_ID_MPEG4:
1219
        s->out_format = FMT_H263;
1220
        s->h263_pred = 1;
1221
        s->unrestricted_mv = 1;
1222
        s->low_delay= s->max_b_frames ? 0 : 1;
1223
        avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1224
        break;
1225
    case CODEC_ID_MSMPEG4V1:
1226
        s->out_format = FMT_H263;
1227
        s->h263_msmpeg4 = 1;
1228
        s->h263_pred = 1;
1229
        s->unrestricted_mv = 1;
1230
        s->msmpeg4_version= 1;
1231
        avctx->delay=0;
1232
        s->low_delay=1;
1233
        break;
1234
    case CODEC_ID_MSMPEG4V2:
1235
        s->out_format = FMT_H263;
1236
        s->h263_msmpeg4 = 1;
1237
        s->h263_pred = 1;
1238
        s->unrestricted_mv = 1;
1239
        s->msmpeg4_version= 2;
1240
        avctx->delay=0;
1241
        s->low_delay=1;
1242
        break;
1243
    case CODEC_ID_MSMPEG4V3:
1244
        s->out_format = FMT_H263;
1245
        s->h263_msmpeg4 = 1;
1246
        s->h263_pred = 1;
1247
        s->unrestricted_mv = 1;
1248
        s->msmpeg4_version= 3;
1249
        s->flipflop_rounding=1;
1250
        avctx->delay=0;
1251
        s->low_delay=1;
1252
        break;
1253
    case CODEC_ID_WMV1:
1254
        s->out_format = FMT_H263;
1255
        s->h263_msmpeg4 = 1;
1256
        s->h263_pred = 1;
1257
        s->unrestricted_mv = 1;
1258
        s->msmpeg4_version= 4;
1259
        s->flipflop_rounding=1;
1260
        avctx->delay=0;
1261
        s->low_delay=1;
1262
        break;
1263
    case CODEC_ID_WMV2:
1264
        s->out_format = FMT_H263;
1265
        s->h263_msmpeg4 = 1;
1266
        s->h263_pred = 1;
1267
        s->unrestricted_mv = 1;
1268
        s->msmpeg4_version= 5;
1269
        s->flipflop_rounding=1;
1270
        avctx->delay=0;
1271
        s->low_delay=1;
1272
        break;
1273
    default:
1274
        return -1;
1275
    }
1276

    
1277
    avctx->has_b_frames= !s->low_delay;
1278

    
1279
    s->encoding = 1;
1280

    
1281
    /* init */
1282
    if (MPV_common_init(s) < 0)
1283
        return -1;
1284

    
1285
    if(s->modified_quant)
1286
        s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1287
    s->progressive_frame=
1288
    s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1289
    s->quant_precision=5;
1290

    
1291
    ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1292
    ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1293

    
1294
#ifdef CONFIG_H261_ENCODER
1295
    if (s->out_format == FMT_H261)
1296
        ff_h261_encode_init(s);
1297
#endif
1298
    if (s->out_format == FMT_H263)
1299
        h263_encode_init(s);
1300
    if(s->msmpeg4_version)
1301
        ff_msmpeg4_encode_init(s);
1302
    if (s->out_format == FMT_MPEG1)
1303
        ff_mpeg1_encode_init(s);
1304

    
1305
    /* init q matrix */
1306
    for(i=0;i<64;i++) {
1307
        int j= s->dsp.idct_permutation[i];
1308
        if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1309
            s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1310
            s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1311
        }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1312
            s->intra_matrix[j] =
1313
            s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1314
        }else
1315
        { /* mpeg1/2 */
1316
            s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1317
            s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1318
        }
1319
        if(s->avctx->intra_matrix)
1320
            s->intra_matrix[j] = s->avctx->intra_matrix[i];
1321
        if(s->avctx->inter_matrix)
1322
            s->inter_matrix[j] = s->avctx->inter_matrix[i];
1323
    }
1324

    
1325
    /* precompute matrix */
1326
    /* for mjpeg, we do include qscale in the matrix */
1327
    if (s->out_format != FMT_MJPEG) {
1328
        convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1329
                       s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1330
        convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1331
                       s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1332
    }
1333

    
1334
    if(ff_rate_control_init(s) < 0)
1335
        return -1;
1336

    
1337
    return 0;
1338
}
1339

    
1340
int MPV_encode_end(AVCodecContext *avctx)
1341
{
1342
    MpegEncContext *s = avctx->priv_data;
1343

    
1344
#ifdef STATS
1345
    print_stats();
1346
#endif
1347

    
1348
    ff_rate_control_uninit(s);
1349

    
1350
    MPV_common_end(s);
1351
    if (s->out_format == FMT_MJPEG)
1352
        mjpeg_close(s);
1353

    
1354
    av_freep(&avctx->extradata);
1355

    
1356
    return 0;
1357
}
1358

    
1359
#endif //CONFIG_ENCODERS
1360

    
1361
void init_rl(RLTable *rl, int use_static)
1362
{
1363
    int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1364
    uint8_t index_run[MAX_RUN+1];
1365
    int last, run, level, start, end, i;
1366

    
1367
    /* If table is static, we can quit if rl->max_level[0] is not NULL */
1368
    if(use_static && rl->max_level[0])
1369
        return;
1370

    
1371
    /* compute max_level[], max_run[] and index_run[] */
1372
    for(last=0;last<2;last++) {
1373
        if (last == 0) {
1374
            start = 0;
1375
            end = rl->last;
1376
        } else {
1377
            start = rl->last;
1378
            end = rl->n;
1379
        }
1380

    
1381
        memset(max_level, 0, MAX_RUN + 1);
1382
        memset(max_run, 0, MAX_LEVEL + 1);
1383
        memset(index_run, rl->n, MAX_RUN + 1);
1384
        for(i=start;i<end;i++) {
1385
            run = rl->table_run[i];
1386
            level = rl->table_level[i];
1387
            if (index_run[run] == rl->n)
1388
                index_run[run] = i;
1389
            if (level > max_level[run])
1390
                max_level[run] = level;
1391
            if (run > max_run[level])
1392
                max_run[level] = run;
1393
        }
1394
        if(use_static)
1395
            rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1396
        else
1397
            rl->max_level[last] = av_malloc(MAX_RUN + 1);
1398
        memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1399
        if(use_static)
1400
            rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1401
        else
1402
            rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1403
        memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1404
        if(use_static)
1405
            rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1406
        else
1407
            rl->index_run[last] = av_malloc(MAX_RUN + 1);
1408
        memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1409
    }
1410
}
1411

    
1412
/* draw the edges of width 'w' of an image of size width, height */
1413
//FIXME check that this is ok for mpeg4 interlaced
1414
static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1415
{
1416
    uint8_t *ptr, *last_line;
1417
    int i;
1418

    
1419
    last_line = buf + (height - 1) * wrap;
1420
    for(i=0;i<w;i++) {
1421
        /* top and bottom */
1422
        memcpy(buf - (i + 1) * wrap, buf, width);
1423
        memcpy(last_line + (i + 1) * wrap, last_line, width);
1424
    }
1425
    /* left and right */
1426
    ptr = buf;
1427
    for(i=0;i<height;i++) {
1428
        memset(ptr - w, ptr[0], w);
1429
        memset(ptr + width, ptr[width-1], w);
1430
        ptr += wrap;
1431
    }
1432
    /* corners */
1433
    for(i=0;i<w;i++) {
1434
        memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1435
        memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1436
        memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1437
        memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1438
    }
1439
}
1440

    
1441
int ff_find_unused_picture(MpegEncContext *s, int shared){
1442
    int i;
1443

    
1444
    if(shared){
1445
        for(i=0; i<MAX_PICTURE_COUNT; i++){
1446
            if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1447
        }
1448
    }else{
1449
        for(i=0; i<MAX_PICTURE_COUNT; i++){
1450
            if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1451
        }
1452
        for(i=0; i<MAX_PICTURE_COUNT; i++){
1453
            if(s->picture[i].data[0]==NULL) return i;
1454
        }
1455
    }
1456

    
1457
    assert(0);
1458
    return -1;
1459
}
1460

    
1461
static void update_noise_reduction(MpegEncContext *s){
1462
    int intra, i;
1463

    
1464
    for(intra=0; intra<2; intra++){
1465
        if(s->dct_count[intra] > (1<<16)){
1466
            for(i=0; i<64; i++){
1467
                s->dct_error_sum[intra][i] >>=1;
1468
            }
1469
            s->dct_count[intra] >>= 1;
1470
        }
1471

    
1472
        for(i=0; i<64; i++){
1473
            s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1474
        }
1475
    }
1476
}
1477

    
1478
/**
1479
 * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1480
 */
1481
int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1482
{
1483
    int i;
1484
    AVFrame *pic;
1485
    s->mb_skipped = 0;
1486

    
1487
    assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1488

    
1489
    /* mark&release old frames */
1490
    if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1491
        avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1492

    
1493
        /* release forgotten pictures */
1494
        /* if(mpeg124/h263) */
1495
        if(!s->encoding){
1496
            for(i=0; i<MAX_PICTURE_COUNT; i++){
1497
                if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1498
                    av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1499
                    avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1500
                }
1501
            }
1502
        }
1503
    }
1504
alloc:
1505
    if(!s->encoding){
1506
        /* release non reference frames */
1507
        for(i=0; i<MAX_PICTURE_COUNT; i++){
1508
            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1509
                s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1510
            }
1511
        }
1512

    
1513
        if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1514
            pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1515
        else{
1516
            i= ff_find_unused_picture(s, 0);
1517
            pic= (AVFrame*)&s->picture[i];
1518
        }
1519

    
1520
        pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1521
                        && !s->dropable ? 3 : 0;
1522

    
1523
        pic->coded_picture_number= s->coded_picture_number++;
1524

    
1525
        if( alloc_picture(s, (Picture*)pic, 0) < 0)
1526
            return -1;
1527

    
1528
        s->current_picture_ptr= (Picture*)pic;
1529
        s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1530
        s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1531
    }
1532

    
1533
    s->current_picture_ptr->pict_type= s->pict_type;
1534
//    if(s->flags && CODEC_FLAG_QSCALE)
1535
  //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1536
    s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1537

    
1538
    copy_picture(&s->current_picture, s->current_picture_ptr);
1539

    
1540
  if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1541
    if (s->pict_type != B_TYPE) {
1542
        s->last_picture_ptr= s->next_picture_ptr;
1543
        if(!s->dropable)
1544
            s->next_picture_ptr= s->current_picture_ptr;
1545
    }
1546
/*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1547
        s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1548
        s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1549
        s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1550
        s->pict_type, s->dropable);*/
1551

    
1552
    if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1553
    if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1554

    
1555
    if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1556
        av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1557
        assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1558
        goto alloc;
1559
    }
1560

    
1561
    assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1562

    
1563
    if(s->picture_structure!=PICT_FRAME){
1564
        int i;
1565
        for(i=0; i<4; i++){
1566
            if(s->picture_structure == PICT_BOTTOM_FIELD){
1567
                 s->current_picture.data[i] += s->current_picture.linesize[i];
1568
            }
1569
            s->current_picture.linesize[i] *= 2;
1570
            s->last_picture.linesize[i] *=2;
1571
            s->next_picture.linesize[i] *=2;
1572
        }
1573
    }
1574
  }
1575

    
1576
    s->hurry_up= s->avctx->hurry_up;
1577
    s->error_resilience= avctx->error_resilience;
1578

    
1579
    /* set dequantizer, we can't do it during init as it might change for mpeg4
1580
       and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1581
    if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1582
        s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1583
        s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1584
    }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1585
        s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1586
        s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1587
    }else{
1588
        s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1589
        s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1590
    }
1591

    
1592
    if(s->dct_error_sum){
1593
        assert(s->avctx->noise_reduction && s->encoding);
1594

    
1595
        update_noise_reduction(s);
1596
    }
1597

    
1598
#ifdef HAVE_XVMC
1599
    if(s->avctx->xvmc_acceleration)
1600
        return XVMC_field_start(s, avctx);
1601
#endif
1602
    return 0;
1603
}
1604

    
1605
/* generic function for encode/decode called after a frame has been coded/decoded */
1606
void MPV_frame_end(MpegEncContext *s)
1607
{
1608
    int i;
1609
    /* draw edge for correct motion prediction if outside */
1610
#ifdef HAVE_XVMC
1611
//just to make sure that all data is rendered.
1612
    if(s->avctx->xvmc_acceleration){
1613
        XVMC_field_end(s);
1614
    }else
1615
#endif
1616
    if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1617
            draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1618
            draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1619
            draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1620
    }
1621
    emms_c();
1622

    
1623
    s->last_pict_type    = s->pict_type;
1624
    s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1625
    if(s->pict_type!=B_TYPE){
1626
        s->last_non_b_pict_type= s->pict_type;
1627
    }
1628
#if 0
1629
        /* copy back current_picture variables */
1630
    for(i=0; i<MAX_PICTURE_COUNT; i++){
1631
        if(s->picture[i].data[0] == s->current_picture.data[0]){
1632
            s->picture[i]= s->current_picture;
1633
            break;
1634
        }
1635
    }
1636
    assert(i<MAX_PICTURE_COUNT);
1637
#endif
1638

    
1639
    if(s->encoding){
1640
        /* release non-reference frames */
1641
        for(i=0; i<MAX_PICTURE_COUNT; i++){
1642
            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1643
                s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1644
            }
1645
        }
1646
    }
1647
    // clear copies, to avoid confusion
1648
#if 0
1649
    memset(&s->last_picture, 0, sizeof(Picture));
1650
    memset(&s->next_picture, 0, sizeof(Picture));
1651
    memset(&s->current_picture, 0, sizeof(Picture));
1652
#endif
1653
    s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1654
}
1655

    
1656
/**
1657
 * draws an line from (ex, ey) -> (sx, sy).
1658
 * @param w width of the image
1659
 * @param h height of the image
1660
 * @param stride stride/linesize of the image
1661
 * @param color color of the arrow
1662
 */
1663
static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1664
    int t, x, y, fr, f;
1665

    
1666
    sx= clip(sx, 0, w-1);
1667
    sy= clip(sy, 0, h-1);
1668
    ex= clip(ex, 0, w-1);
1669
    ey= clip(ey, 0, h-1);
1670

    
1671
    buf[sy*stride + sx]+= color;
1672

    
1673
    if(ABS(ex - sx) > ABS(ey - sy)){
1674
        if(sx > ex){
1675
            t=sx; sx=ex; ex=t;
1676
            t=sy; sy=ey; ey=t;
1677
        }
1678
        buf+= sx + sy*stride;
1679
        ex-= sx;
1680
        f= ((ey-sy)<<16)/ex;
1681
        for(x= 0; x <= ex; x++){
1682
            y = (x*f)>>16;
1683
            fr= (x*f)&0xFFFF;
1684
            buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1685
            buf[(y+1)*stride + x]+= (color*         fr )>>16;
1686
        }
1687
    }else{
1688
        if(sy > ey){
1689
            t=sx; sx=ex; ex=t;
1690
            t=sy; sy=ey; ey=t;
1691
        }
1692
        buf+= sx + sy*stride;
1693
        ey-= sy;
1694
        if(ey) f= ((ex-sx)<<16)/ey;
1695
        else   f= 0;
1696
        for(y= 0; y <= ey; y++){
1697
            x = (y*f)>>16;
1698
            fr= (y*f)&0xFFFF;
1699
            buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1700
            buf[y*stride + x+1]+= (color*         fr )>>16;;
1701
        }
1702
    }
1703
}
1704

    
1705
/**
1706
 * draws an arrow from (ex, ey) -> (sx, sy).
1707
 * @param w width of the image
1708
 * @param h height of the image
1709
 * @param stride stride/linesize of the image
1710
 * @param color color of the arrow
1711
 */
1712
static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1713
    int dx,dy;
1714

    
1715
    sx= clip(sx, -100, w+100);
1716
    sy= clip(sy, -100, h+100);
1717
    ex= clip(ex, -100, w+100);
1718
    ey= clip(ey, -100, h+100);
1719

    
1720
    dx= ex - sx;
1721
    dy= ey - sy;
1722

    
1723
    if(dx*dx + dy*dy > 3*3){
1724
        int rx=  dx + dy;
1725
        int ry= -dx + dy;
1726
        int length= ff_sqrt((rx*rx + ry*ry)<<8);
1727

    
1728
        //FIXME subpixel accuracy
1729
        rx= ROUNDED_DIV(rx*3<<4, length);
1730
        ry= ROUNDED_DIV(ry*3<<4, length);
1731

    
1732
        draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1733
        draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1734
    }
1735
    draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1736
}
1737

    
1738
/**
1739
 * prints debuging info for the given picture.
1740
 */
1741
void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1742

    
1743
    if(!pict || !pict->mb_type) return;
1744

    
1745
    if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1746
        int x,y;
1747

    
1748
        av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1749
        switch (pict->pict_type) {
1750
            case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1751
            case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1752
            case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1753
            case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1754
            case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1755
            case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1756
        }
1757
        for(y=0; y<s->mb_height; y++){
1758
            for(x=0; x<s->mb_width; x++){
1759
                if(s->avctx->debug&FF_DEBUG_SKIP){
1760
                    int count= s->mbskip_table[x + y*s->mb_stride];
1761
                    if(count>9) count=9;
1762
                    av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1763
                }
1764
                if(s->avctx->debug&FF_DEBUG_QP){
1765
                    av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1766
                }
1767
                if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1768
                    int mb_type= pict->mb_type[x + y*s->mb_stride];
1769
                    //Type & MV direction
1770
                    if(IS_PCM(mb_type))
1771
                        av_log(s->avctx, AV_LOG_DEBUG, "P");
1772
                    else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1773
                        av_log(s->avctx, AV_LOG_DEBUG, "A");
1774
                    else if(IS_INTRA4x4(mb_type))
1775
                        av_log(s->avctx, AV_LOG_DEBUG, "i");
1776
                    else if(IS_INTRA16x16(mb_type))
1777
                        av_log(s->avctx, AV_LOG_DEBUG, "I");
1778
                    else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1779
                        av_log(s->avctx, AV_LOG_DEBUG, "d");
1780
                    else if(IS_DIRECT(mb_type))
1781
                        av_log(s->avctx, AV_LOG_DEBUG, "D");
1782
                    else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1783
                        av_log(s->avctx, AV_LOG_DEBUG, "g");
1784
                    else if(IS_GMC(mb_type))
1785
                        av_log(s->avctx, AV_LOG_DEBUG, "G");
1786
                    else if(IS_SKIP(mb_type))
1787
                        av_log(s->avctx, AV_LOG_DEBUG, "S");
1788
                    else if(!USES_LIST(mb_type, 1))
1789
                        av_log(s->avctx, AV_LOG_DEBUG, ">");
1790
                    else if(!USES_LIST(mb_type, 0))
1791
                        av_log(s->avctx, AV_LOG_DEBUG, "<");
1792
                    else{
1793
                        assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1794
                        av_log(s->avctx, AV_LOG_DEBUG, "X");
1795
                    }
1796

    
1797
                    //segmentation
1798
                    if(IS_8X8(mb_type))
1799
                        av_log(s->avctx, AV_LOG_DEBUG, "+");
1800
                    else if(IS_16X8(mb_type))
1801
                        av_log(s->avctx, AV_LOG_DEBUG, "-");
1802
                    else if(IS_8X16(mb_type))
1803
                        av_log(s->avctx, AV_LOG_DEBUG, "|");
1804
                    else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1805
                        av_log(s->avctx, AV_LOG_DEBUG, " ");
1806
                    else
1807
                        av_log(s->avctx, AV_LOG_DEBUG, "?");
1808

    
1809

    
1810
                    if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1811
                        av_log(s->avctx, AV_LOG_DEBUG, "=");
1812
                    else
1813
                        av_log(s->avctx, AV_LOG_DEBUG, " ");
1814
                }
1815
//                av_log(s->avctx, AV_LOG_DEBUG, " ");
1816
            }
1817
            av_log(s->avctx, AV_LOG_DEBUG, "\n");
1818
        }
1819
    }
1820

    
1821
    if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1822
        const int shift= 1 + s->quarter_sample;
1823
        int mb_y;
1824
        uint8_t *ptr;
1825
        int i;
1826
        int h_chroma_shift, v_chroma_shift;
1827
        const int width = s->avctx->width;
1828
        const int height= s->avctx->height;
1829
        const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1830
        const int mv_stride= (s->mb_width << mv_sample_log2) + 1;
1831
        s->low_delay=0; //needed to see the vectors without trashing the buffers
1832

    
1833
        avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1834
        for(i=0; i<3; i++){
1835
            memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1836
            pict->data[i]= s->visualization_buffer[i];
1837
        }
1838
        pict->type= FF_BUFFER_TYPE_COPY;
1839
        ptr= pict->data[0];
1840

    
1841
        for(mb_y=0; mb_y<s->mb_height; mb_y++){
1842
            int mb_x;
1843
            for(mb_x=0; mb_x<s->mb_width; mb_x++){
1844
                const int mb_index= mb_x + mb_y*s->mb_stride;
1845
                if((s->avctx->debug_mv) && pict->motion_val){
1846
                  int type;
1847
                  for(type=0; type<3; type++){
1848
                    int direction = 0;
1849
                    switch (type) {
1850
                      case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1851
                                continue;
1852
                              direction = 0;
1853
                              break;
1854
                      case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1855
                                continue;
1856
                              direction = 0;
1857
                              break;
1858
                      case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1859
                                continue;
1860
                              direction = 1;
1861
                              break;
1862
                    }
1863
                    if(!USES_LIST(pict->mb_type[mb_index], direction))
1864
                        continue;
1865

    
1866
                    if(IS_8X8(pict->mb_type[mb_index])){
1867
                      int i;
1868
                      for(i=0; i<4; i++){
1869
                        int sx= mb_x*16 + 4 + 8*(i&1);
1870
                        int sy= mb_y*16 + 4 + 8*(i>>1);
1871
                        int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1872
                        int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1873
                        int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1874
                        draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1875
                      }
1876
                    }else if(IS_16X8(pict->mb_type[mb_index])){
1877
                      int i;
1878
                      for(i=0; i<2; i++){
1879
                        int sx=mb_x*16 + 8;
1880
                        int sy=mb_y*16 + 4 + 8*i;
1881
                        int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1882
                        int mx=(pict->motion_val[direction][xy][0]>>shift);
1883
                        int my=(pict->motion_val[direction][xy][1]>>shift);
1884

    
1885
                        if(IS_INTERLACED(pict->mb_type[mb_index]))
1886
                            my*=2;
1887

    
1888
                        draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1889
                      }
1890
                    }else if(IS_8X16(pict->mb_type[mb_index])){
1891
                      int i;
1892
                      for(i=0; i<2; i++){
1893
                        int sx=mb_x*16 + 4 + 8*i;
1894
                        int sy=mb_y*16 + 8;
1895
                        int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1896
                        int mx=(pict->motion_val[direction][xy][0]>>shift);
1897
                        int my=(pict->motion_val[direction][xy][1]>>shift);
1898

    
1899
                        if(IS_INTERLACED(pict->mb_type[mb_index]))
1900
                            my*=2;
1901

    
1902
                        draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1903
                      }
1904
                    }else{
1905
                      int sx= mb_x*16 + 8;
1906
                      int sy= mb_y*16 + 8;
1907
                      int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1908
                      int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1909
                      int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1910
                      draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1911
                    }
1912
                  }
1913
                }
1914
                if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1915
                    uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1916
                    int y;
1917
                    for(y=0; y<8; y++){
1918
                        *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1919
                        *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1920
                    }
1921
                }
1922
                if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1923
                    int mb_type= pict->mb_type[mb_index];
1924
                    uint64_t u,v;
1925
                    int y;
1926
#define COLOR(theta, r)\
1927
u= (int)(128 + r*cos(theta*3.141592/180));\
1928
v= (int)(128 + r*sin(theta*3.141592/180));
1929

    
1930

    
1931
                    u=v=128;
1932
                    if(IS_PCM(mb_type)){
1933
                        COLOR(120,48)
1934
                    }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1935
                        COLOR(30,48)
1936
                    }else if(IS_INTRA4x4(mb_type)){
1937
                        COLOR(90,48)
1938
                    }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1939
//                        COLOR(120,48)
1940
                    }else if(IS_DIRECT(mb_type)){
1941
                        COLOR(150,48)
1942
                    }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1943
                        COLOR(170,48)
1944
                    }else if(IS_GMC(mb_type)){
1945
                        COLOR(190,48)
1946
                    }else if(IS_SKIP(mb_type)){
1947
//                        COLOR(180,48)
1948
                    }else if(!USES_LIST(mb_type, 1)){
1949
                        COLOR(240,48)
1950
                    }else if(!USES_LIST(mb_type, 0)){
1951
                        COLOR(0,48)
1952
                    }else{
1953
                        assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1954
                        COLOR(300,48)
1955
                    }
1956

    
1957
                    u*= 0x0101010101010101ULL;
1958
                    v*= 0x0101010101010101ULL;
1959
                    for(y=0; y<8; y++){
1960
                        *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1961
                        *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1962
                    }
1963

    
1964
                    //segmentation
1965
                    if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1966
                        *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1967
                        *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1968
                    }
1969
                    if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1970
                        for(y=0; y<16; y++)
1971
                            pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1972
                    }
1973
                    if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1974
                        int dm= 1 << (mv_sample_log2-2);
1975
                        for(i=0; i<4; i++){
1976
                            int sx= mb_x*16 + 8*(i&1);
1977
                            int sy= mb_y*16 + 8*(i>>1);
1978
                            int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1979
                            //FIXME bidir
1980
                            int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1981
                            if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1982
                                for(y=0; y<8; y++)
1983
                                    pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1984
                            if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1985
                                *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1986
                        }
1987
                    }
1988

    
1989
                    if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1990
                        // hmm
1991
                    }
1992
                }
1993
                s->mbskip_table[mb_index]=0;
1994
            }
1995
        }
1996
    }
1997
}
1998

    
1999
#ifdef CONFIG_ENCODERS
2000

    
2001
static int get_sae(uint8_t *src, int ref, int stride){
2002
    int x,y;
2003
    int acc=0;
2004

    
2005
    for(y=0; y<16; y++){
2006
        for(x=0; x<16; x++){
2007
            acc+= ABS(src[x+y*stride] - ref);
2008
        }
2009
    }
2010

    
2011
    return acc;
2012
}
2013

    
2014
static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2015
    int x, y, w, h;
2016
    int acc=0;
2017

    
2018
    w= s->width &~15;
2019
    h= s->height&~15;
2020

    
2021
    for(y=0; y<h; y+=16){
2022
        for(x=0; x<w; x+=16){
2023
            int offset= x + y*stride;
2024
            int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2025
            int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2026
            int sae = get_sae(src + offset, mean, stride);
2027

    
2028
            acc+= sae + 500 < sad;
2029
        }
2030
    }
2031
    return acc;
2032
}
2033

    
2034

    
2035
static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2036
    AVFrame *pic=NULL;
2037
    int64_t pts;
2038
    int i;
2039
    const int encoding_delay= s->max_b_frames;
2040
    int direct=1;
2041

    
2042
    if(pic_arg){
2043
        pts= pic_arg->pts;
2044
        pic_arg->display_picture_number= s->input_picture_number++;
2045

    
2046
        if(pts != AV_NOPTS_VALUE){
2047
            if(s->user_specified_pts != AV_NOPTS_VALUE){
2048
                int64_t time= pts;
2049
                int64_t last= s->user_specified_pts;
2050

    
2051
                if(time <= last){
2052
                    av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2053
                    return -1;
2054
                }
2055
            }
2056
            s->user_specified_pts= pts;
2057
        }else{
2058
            if(s->user_specified_pts != AV_NOPTS_VALUE){
2059
                s->user_specified_pts=
2060
                pts= s->user_specified_pts + 1;
2061
                av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2062
            }else{
2063
                pts= pic_arg->display_picture_number;
2064
            }
2065
        }
2066
    }
2067

    
2068
  if(pic_arg){
2069
    if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2070
    if(pic_arg->linesize[0] != s->linesize) direct=0;
2071
    if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2072
    if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2073

    
2074
//    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2075

    
2076
    if(direct){
2077
        i= ff_find_unused_picture(s, 1);
2078

    
2079
        pic= (AVFrame*)&s->picture[i];
2080
        pic->reference= 3;
2081

    
2082
        for(i=0; i<4; i++){
2083
            pic->data[i]= pic_arg->data[i];
2084
            pic->linesize[i]= pic_arg->linesize[i];
2085
        }
2086
        alloc_picture(s, (Picture*)pic, 1);
2087
    }else{
2088
        i= ff_find_unused_picture(s, 0);
2089

    
2090
        pic= (AVFrame*)&s->picture[i];
2091
        pic->reference= 3;
2092

    
2093
        alloc_picture(s, (Picture*)pic, 0);
2094

    
2095
        if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2096
           && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2097
           && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2098
       // empty
2099
        }else{
2100
            int h_chroma_shift, v_chroma_shift;
2101
            avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2102

    
2103
            for(i=0; i<3; i++){
2104
                int src_stride= pic_arg->linesize[i];
2105
                int dst_stride= i ? s->uvlinesize : s->linesize;
2106
                int h_shift= i ? h_chroma_shift : 0;
2107
                int v_shift= i ? v_chroma_shift : 0;
2108
                int w= s->width >>h_shift;
2109
                int h= s->height>>v_shift;
2110
                uint8_t *src= pic_arg->data[i];
2111
                uint8_t *dst= pic->data[i] + INPLACE_OFFSET;
2112

    
2113
                if(src_stride==dst_stride)
2114
                    memcpy(dst, src, src_stride*h);
2115
                else{
2116
                    while(h--){
2117
                        memcpy(dst, src, w);
2118
                        dst += dst_stride;
2119
                        src += src_stride;
2120
                    }
2121
                }
2122
            }
2123
        }
2124
    }
2125
    copy_picture_attributes(s, pic, pic_arg);
2126
    pic->pts= pts; //we set this here to avoid modifiying pic_arg
2127
  }
2128

    
2129
    /* shift buffer entries */
2130
    for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2131
        s->input_picture[i-1]= s->input_picture[i];
2132

    
2133
    s->input_picture[encoding_delay]= (Picture*)pic;
2134

    
2135
    return 0;
2136
}
2137

    
2138
static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2139
    int x, y, plane;
2140
    int score=0;
2141
    int64_t score64=0;
2142

    
2143
    for(plane=0; plane<3; plane++){
2144
        const int stride= p->linesize[plane];
2145
        const int bw= plane ? 1 : 2;
2146
        for(y=0; y<s->mb_height*bw; y++){
2147
            for(x=0; x<s->mb_width*bw; x++){
2148
                int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2149
                int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2150

    
2151
                switch(s->avctx->frame_skip_exp){
2152
                    case 0: score= FFMAX(score, v); break;
2153
                    case 1: score+= ABS(v);break;
2154
                    case 2: score+= v*v;break;
2155
                    case 3: score64+= ABS(v*v*(int64_t)v);break;
2156
                    case 4: score64+= v*v*(int64_t)(v*v);break;
2157
                }
2158
            }
2159
        }
2160
    }
2161

    
2162
    if(score) score64= score;
2163

    
2164
    if(score64 < s->avctx->frame_skip_threshold)
2165
        return 1;
2166
    if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2167
        return 1;
2168
    return 0;
2169
}
2170

    
2171
static int estimate_best_b_count(MpegEncContext *s){
2172
    AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2173
    AVCodecContext *c= avcodec_alloc_context();
2174
    AVFrame input[FF_MAX_B_FRAMES+2];
2175
    const int scale= s->avctx->brd_scale;
2176
    int i, j, out_size, p_lambda, b_lambda, lambda2;
2177
    int outbuf_size= s->width * s->height; //FIXME
2178
    uint8_t *outbuf= av_malloc(outbuf_size);
2179
    ImgReSampleContext *resample;
2180
    int64_t best_rd= INT64_MAX;
2181
    int best_b_count= -1;
2182

    
2183
//    emms_c();
2184
    p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2185
    b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2186
    if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2187
    lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2188

    
2189
    c->width = s->width >> scale;
2190
    c->height= s->height>> scale;
2191
    c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2192
    c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2193
    c->mb_decision= s->avctx->mb_decision;
2194
    c->me_cmp= s->avctx->me_cmp;
2195
    c->mb_cmp= s->avctx->mb_cmp;
2196
    c->me_sub_cmp= s->avctx->me_sub_cmp;
2197
    c->pix_fmt = PIX_FMT_YUV420P;
2198
    c->time_base= s->avctx->time_base;
2199
    c->max_b_frames= s->max_b_frames;
2200

    
2201
    if (avcodec_open(c, codec) < 0)
2202
        return -1;
2203

    
2204
    resample= img_resample_init(c->width, c->height, s->width, s->height); //FIXME use sws
2205

    
2206
    for(i=0; i<s->max_b_frames+2; i++){
2207
        int ysize= c->width*c->height;
2208
        int csize= (c->width/2)*(c->height/2);
2209
        Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2210

    
2211
        if(pre_input_ptr)
2212
            pre_input= *pre_input_ptr;
2213

    
2214
        if(pre_input.type != FF_BUFFER_TYPE_SHARED && i){
2215
            pre_input.data[0]+=INPLACE_OFFSET;
2216
            pre_input.data[1]+=INPLACE_OFFSET;
2217
            pre_input.data[2]+=INPLACE_OFFSET;
2218
        }
2219

    
2220
        avcodec_get_frame_defaults(&input[i]);
2221
        input[i].data[0]= av_malloc(ysize + 2*csize);
2222
        input[i].data[1]= input[i].data[0] + ysize;
2223
        input[i].data[2]= input[i].data[1] + csize;
2224
        input[i].linesize[0]= c->width;
2225
        input[i].linesize[1]=
2226
        input[i].linesize[2]= c->width/2;
2227

    
2228
        if(!i || s->input_picture[i-1])
2229
            img_resample(resample, &input[i], &pre_input);
2230
    }
2231

    
2232
    for(j=0; j<s->max_b_frames+1; j++){
2233
        int64_t rd=0;
2234

    
2235
        if(!s->input_picture[j])
2236
            break;
2237

    
2238
        c->error[0]= c->error[1]= c->error[2]= 0;
2239

    
2240
        input[0].pict_type= I_TYPE;
2241
        input[0].quality= 1 * FF_QP2LAMBDA;
2242
        out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2243
//        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2244

    
2245
        for(i=0; i<s->max_b_frames+1; i++){
2246
            int is_p= i % (j+1) == j || i==s->max_b_frames;
2247

    
2248
            input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2249
            input[i+1].quality= is_p ? p_lambda : b_lambda;
2250
            out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2251
            rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2252
        }
2253

    
2254
        /* get the delayed frames */
2255
        while(out_size){
2256
            out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2257
            rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2258
        }
2259

    
2260
        rd += c->error[0] + c->error[1] + c->error[2];
2261

    
2262
        if(rd < best_rd){
2263
            best_rd= rd;
2264
            best_b_count= j;
2265
        }
2266
    }
2267

    
2268
    av_freep(&outbuf);
2269
    avcodec_close(c);
2270
    av_freep(&c);
2271
    img_resample_close(resample);
2272

    
2273
    for(i=0; i<s->max_b_frames+2; i++){
2274
        av_freep(&input[i].data[0]);
2275
    }
2276

    
2277
    return best_b_count;
2278
}
2279

    
2280
static void select_input_picture(MpegEncContext *s){
2281
    int i;
2282

    
2283
    for(i=1; i<MAX_PICTURE_COUNT; i++)
2284
        s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2285
    s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2286

    
2287
    /* set next picture type & ordering */
2288
    if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2289
        if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2290
            s->reordered_input_picture[0]= s->input_picture[0];
2291
            s->reordered_input_picture[0]->pict_type= I_TYPE;
2292
            s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2293
        }else{
2294
            int b_frames;
2295

    
2296
            if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2297
                if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2298
                //FIXME check that te gop check above is +-1 correct
2299
//av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2300

    
2301
                    if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2302
                        for(i=0; i<4; i++)
2303
                            s->input_picture[0]->data[i]= NULL;
2304
                        s->input_picture[0]->type= 0;
2305
                    }else{
2306
                        assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2307
                               || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2308

    
2309
                        s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2310
                    }
2311

    
2312
                    emms_c();
2313
                    ff_vbv_update(s, 0);
2314

    
2315
                    goto no_output_pic;
2316
                }
2317
            }
2318

    
2319
            if(s->flags&CODEC_FLAG_PASS2){
2320
                for(i=0; i<s->max_b_frames+1; i++){
2321
                    int pict_num= s->input_picture[0]->display_picture_number + i;
2322

    
2323
                    if(pict_num >= s->rc_context.num_entries)
2324
                        break;
2325
                    if(!s->input_picture[i]){
2326
                        s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2327
                        break;
2328
                    }
2329

    
2330
                    s->input_picture[i]->pict_type=
2331
                        s->rc_context.entry[pict_num].new_pict_type;
2332
                }
2333
            }
2334

    
2335
            if(s->avctx->b_frame_strategy==0){
2336
                b_frames= s->max_b_frames;
2337
                while(b_frames && !s->input_picture[b_frames]) b_frames--;
2338
            }else if(s->avctx->b_frame_strategy==1){
2339
                for(i=1; i<s->max_b_frames+1; i++){
2340
                    if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2341
                        s->input_picture[i]->b_frame_score=
2342
                            get_intra_count(s, s->input_picture[i  ]->data[0],
2343
                                               s->input_picture[i-1]->data[0], s->linesize) + 1;
2344
                    }
2345
                }
2346
                for(i=0; i<s->max_b_frames+1; i++){
2347
                    if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2348
                }
2349

    
2350
                b_frames= FFMAX(0, i-1);
2351

    
2352
                /* reset scores */
2353
                for(i=0; i<b_frames+1; i++){
2354
                    s->input_picture[i]->b_frame_score=0;
2355
                }
2356
            }else if(s->avctx->b_frame_strategy==2){
2357
                b_frames= estimate_best_b_count(s);
2358
            }else{
2359
                av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2360
                b_frames=0;
2361
            }
2362

    
2363
            emms_c();
2364
//static int b_count=0;
2365
//b_count+= b_frames;
2366
//av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2367

    
2368
            for(i= b_frames - 1; i>=0; i--){
2369
                int type= s->input_picture[i]->pict_type;
2370
                if(type && type != B_TYPE)
2371
                    b_frames= i;
2372
            }
2373
            if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2374
                av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2375
            }
2376

    
2377
            if(s->picture_in_gop_number + b_frames >= s->gop_size){
2378
              if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2379
                    b_frames= s->gop_size - s->picture_in_gop_number - 1;
2380
              }else{
2381
                if(s->flags & CODEC_FLAG_CLOSED_GOP)
2382
                    b_frames=0;
2383
                s->input_picture[b_frames]->pict_type= I_TYPE;
2384
              }
2385
            }
2386

    
2387
            if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2388
               && b_frames
2389
               && s->input_picture[b_frames]->pict_type== I_TYPE)
2390
                b_frames--;
2391

    
2392
            s->reordered_input_picture[0]= s->input_picture[b_frames];
2393
            if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2394
                s->reordered_input_picture[0]->pict_type= P_TYPE;
2395
            s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2396
            for(i=0; i<b_frames; i++){
2397
                s->reordered_input_picture[i+1]= s->input_picture[i];
2398
                s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2399
                s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2400
            }
2401
        }
2402
    }
2403
no_output_pic:
2404
    if(s->reordered_input_picture[0]){
2405
        s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2406

    
2407
        copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2408

    
2409
        if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2410
            // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2411

    
2412
            int i= ff_find_unused_picture(s, 0);
2413
            Picture *pic= &s->picture[i];
2414

    
2415
            /* mark us unused / free shared pic */
2416
            for(i=0; i<4; i++)
2417
                s->reordered_input_picture[0]->data[i]= NULL;
2418
            s->reordered_input_picture[0]->type= 0;
2419

    
2420
            pic->reference              = s->reordered_input_picture[0]->reference;
2421

    
2422
            alloc_picture(s, pic, 0);
2423

    
2424
            copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2425

    
2426
            s->current_picture_ptr= pic;
2427
        }else{
2428
            // input is not a shared pix -> reuse buffer for current_pix
2429

    
2430
            assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2431
                   || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2432

    
2433
            s->current_picture_ptr= s->reordered_input_picture[0];
2434
            for(i=0; i<4; i++){
2435
                s->new_picture.data[i]+= INPLACE_OFFSET;
2436
            }
2437
        }
2438
        copy_picture(&s->current_picture, s->current_picture_ptr);
2439

    
2440
        s->picture_number= s->new_picture.display_picture_number;
2441
//printf("dpn:%d\n", s->picture_number);
2442
    }else{
2443
       memset(&s->new_picture, 0, sizeof(Picture));
2444
    }
2445
}
2446

    
2447
int MPV_encode_picture(AVCodecContext *avctx,
2448
                       unsigned char *buf, int buf_size, void *data)
2449
{
2450
    MpegEncContext *s = avctx->priv_data;
2451
    AVFrame *pic_arg = data;
2452
    int i, stuffing_count;
2453

    
2454
    if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
2455
        av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2456
        return -1;
2457
    }
2458

    
2459
    for(i=0; i<avctx->thread_count; i++){
2460
        int start_y= s->thread_context[i]->start_mb_y;
2461
        int   end_y= s->thread_context[i]->  end_mb_y;
2462
        int h= s->mb_height;
2463
        uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2464
        uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2465

    
2466
        init_put_bits(&s->thread_context[i]->pb, start, end - start);
2467
    }
2468

    
2469
    s->picture_in_gop_number++;
2470

    
2471
    if(load_input_picture(s, pic_arg) < 0)
2472
        return -1;
2473

    
2474
    select_input_picture(s);
2475

    
2476
    /* output? */
2477
    if(s->new_picture.data[0]){
2478
        s->pict_type= s->new_picture.pict_type;
2479
//emms_c();
2480
//printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2481
        MPV_frame_start(s, avctx);
2482

    
2483
        encode_picture(s, s->picture_number);
2484

    
2485
        avctx->real_pict_num  = s->picture_number;
2486
        avctx->header_bits = s->header_bits;
2487
        avctx->mv_bits     = s->mv_bits;
2488
        avctx->misc_bits   = s->misc_bits;
2489
        avctx->i_tex_bits  = s->i_tex_bits;
2490
        avctx->p_tex_bits  = s->p_tex_bits;
2491
        avctx->i_count     = s->i_count;
2492
        avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2493
        avctx->skip_count  = s->skip_count;
2494

    
2495
        MPV_frame_end(s);
2496

    
2497
        if (s->out_format == FMT_MJPEG)
2498
            mjpeg_picture_trailer(s);
2499

    
2500
        if(s->flags&CODEC_FLAG_PASS1)
2501
            ff_write_pass1_stats(s);
2502

    
2503
        for(i=0; i<4; i++){
2504
            s->current_picture_ptr->error[i]= s->current_picture.error[i];
2505
            avctx->error[i] += s->current_picture_ptr->error[i];
2506
        }
2507

    
2508
        if(s->flags&CODEC_FLAG_PASS1)
2509
            assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2510
        flush_put_bits(&s->pb);
2511
        s->frame_bits  = put_bits_count(&s->pb);
2512

    
2513
        stuffing_count= ff_vbv_update(s, s->frame_bits);
2514
        if(stuffing_count){
2515
            if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2516
                av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2517
                return -1;
2518
            }
2519

    
2520
            switch(s->codec_id){
2521
            case CODEC_ID_MPEG1VIDEO:
2522
            case CODEC_ID_MPEG2VIDEO:
2523
                while(stuffing_count--){
2524
                    put_bits(&s->pb, 8, 0);
2525
                }
2526
            break;
2527
            case CODEC_ID_MPEG4:
2528
                put_bits(&s->pb, 16, 0);
2529
                put_bits(&s->pb, 16, 0x1C3);
2530
                stuffing_count -= 4;
2531
                while(stuffing_count--){
2532
                    put_bits(&s->pb, 8, 0xFF);
2533
                }
2534
            break;
2535
            default:
2536
                av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2537
            }
2538
            flush_put_bits(&s->pb);
2539
            s->frame_bits  = put_bits_count(&s->pb);
2540
        }
2541

    
2542
        /* update mpeg1/2 vbv_delay for CBR */
2543
        if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2544
           && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2545
            int vbv_delay;
2546

    
2547
            assert(s->repeat_first_field==0);
2548

    
2549
            vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2550
            assert(vbv_delay < 0xFFFF);
2551

    
2552
            s->vbv_delay_ptr[0] &= 0xF8;
2553
            s->vbv_delay_ptr[0] |= vbv_delay>>13;
2554
            s->vbv_delay_ptr[1]  = vbv_delay>>5;
2555
            s->vbv_delay_ptr[2] &= 0x07;
2556
            s->vbv_delay_ptr[2] |= vbv_delay<<3;
2557
        }
2558
        s->total_bits += s->frame_bits;
2559
        avctx->frame_bits  = s->frame_bits;
2560
    }else{
2561
        assert((pbBufPtr(&s->pb) == s->pb.buf));
2562
        s->frame_bits=0;
2563
    }
2564
    assert((s->frame_bits&7)==0);
2565

    
2566
    return s->frame_bits/8;
2567
}
2568

    
2569
#endif //CONFIG_ENCODERS
2570

    
2571
static inline void gmc1_motion(MpegEncContext *s,
2572
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2573
                               uint8_t **ref_picture)
2574
{
2575
    uint8_t *ptr;
2576
    int offset, src_x, src_y, linesize, uvlinesize;
2577
    int motion_x, motion_y;
2578
    int emu=0;
2579

    
2580
    motion_x= s->sprite_offset[0][0];
2581
    motion_y= s->sprite_offset[0][1];
2582
    src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2583
    src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2584
    motion_x<<=(3-s->sprite_warping_accuracy);
2585
    motion_y<<=(3-s->sprite_warping_accuracy);
2586
    src_x = clip(src_x, -16, s->width);
2587
    if (src_x == s->width)
2588
        motion_x =0;
2589
    src_y = clip(src_y, -16, s->height);
2590
    if (src_y == s->height)
2591
        motion_y =0;
2592

    
2593
    linesize = s->linesize;
2594
    uvlinesize = s->uvlinesize;
2595

    
2596
    ptr = ref_picture[0] + (src_y * linesize) + src_x;
2597

    
2598
    if(s->flags&CODEC_FLAG_EMU_EDGE){
2599
        if(   (unsigned)src_x >= s->h_edge_pos - 17
2600
           || (unsigned)src_y >= s->v_edge_pos - 17){
2601
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2602
            ptr= s->edge_emu_buffer;
2603
        }
2604
    }
2605

    
2606
    if((motion_x|motion_y)&7){
2607
        s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2608
        s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2609
    }else{
2610
        int dxy;
2611

    
2612
        dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2613
        if (s->no_rounding){
2614
            s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2615
        }else{
2616
            s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2617
        }
2618
    }
2619

    
2620
    if(s->flags&CODEC_FLAG_GRAY) return;
2621

    
2622
    motion_x= s->sprite_offset[1][0];
2623
    motion_y= s->sprite_offset[1][1];
2624
    src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2625
    src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2626
    motion_x<<=(3-s->sprite_warping_accuracy);
2627
    motion_y<<=(3-s->sprite_warping_accuracy);
2628
    src_x = clip(src_x, -8, s->width>>1);
2629
    if (src_x == s->width>>1)
2630
        motion_x =0;
2631
    src_y = clip(src_y, -8, s->height>>1);
2632
    if (src_y == s->height>>1)
2633
        motion_y =0;
2634

    
2635
    offset = (src_y * uvlinesize) + src_x;
2636
    ptr = ref_picture[1] + offset;
2637
    if(s->flags&CODEC_FLAG_EMU_EDGE){
2638
        if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2639
           || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2640
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2641
            ptr= s->edge_emu_buffer;
2642
            emu=1;
2643
        }
2644
    }
2645
    s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2646

    
2647
    ptr = ref_picture[2] + offset;
2648
    if(emu){
2649
        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2650
        ptr= s->edge_emu_buffer;
2651
    }
2652
    s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2653

    
2654
    return;
2655
}
2656

    
2657
static inline void gmc_motion(MpegEncContext *s,
2658
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2659
                               uint8_t **ref_picture)
2660
{
2661
    uint8_t *ptr;
2662
    int linesize, uvlinesize;
2663
    const int a= s->sprite_warping_accuracy;
2664
    int ox, oy;
2665

    
2666
    linesize = s->linesize;
2667
    uvlinesize = s->uvlinesize;
2668

    
2669
    ptr = ref_picture[0];
2670

    
2671
    ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2672
    oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2673

    
2674
    s->dsp.gmc(dest_y, ptr, linesize, 16,
2675
           ox,
2676
           oy,
2677
           s->sprite_delta[0][0], s->sprite_delta[0][1],
2678
           s->sprite_delta[1][0], s->sprite_delta[1][1],
2679
           a+1, (1<<(2*a+1)) - s->no_rounding,
2680
           s->h_edge_pos, s->v_edge_pos);
2681
    s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2682
           ox + s->sprite_delta[0][0]*8,
2683
           oy + s->sprite_delta[1][0]*8,
2684
           s->sprite_delta[0][0], s->sprite_delta[0][1],
2685
           s->sprite_delta[1][0], s->sprite_delta[1][1],
2686
           a+1, (1<<(2*a+1)) - s->no_rounding,
2687
           s->h_edge_pos, s->v_edge_pos);
2688

    
2689
    if(s->flags&CODEC_FLAG_GRAY) return;
2690

    
2691
    ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2692
    oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2693

    
2694
    ptr = ref_picture[1];
2695
    s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2696
           ox,
2697
           oy,
2698
           s->sprite_delta[0][0], s->sprite_delta[0][1],
2699
           s->sprite_delta[1][0], s->sprite_delta[1][1],
2700
           a+1, (1<<(2*a+1)) - s->no_rounding,
2701
           s->h_edge_pos>>1, s->v_edge_pos>>1);
2702

    
2703
    ptr = ref_picture[2];
2704
    s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2705
           ox,
2706
           oy,
2707
           s->sprite_delta[0][0], s->sprite_delta[0][1],
2708
           s->sprite_delta[1][0], s->sprite_delta[1][1],
2709
           a+1, (1<<(2*a+1)) - s->no_rounding,
2710
           s->h_edge_pos>>1, s->v_edge_pos>>1);
2711
}
2712

    
2713
/**
2714
 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2715
 * @param buf destination buffer
2716
 * @param src source buffer
2717
 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2718
 * @param block_w width of block
2719
 * @param block_h height of block
2720
 * @param src_x x coordinate of the top left sample of the block in the source buffer
2721
 * @param src_y y coordinate of the top left sample of the block in the source buffer
2722
 * @param w width of the source buffer
2723
 * @param h height of the source buffer
2724
 */
2725
void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2726
                                    int src_x, int src_y, int w, int h){
2727
    int x, y;
2728
    int start_y, start_x, end_y, end_x;
2729

    
2730
    if(src_y>= h){
2731
        src+= (h-1-src_y)*linesize;
2732
        src_y=h-1;
2733
    }else if(src_y<=-block_h){
2734
        src+= (1-block_h-src_y)*linesize;
2735
        src_y=1-block_h;
2736
    }
2737
    if(src_x>= w){
2738
        src+= (w-1-src_x);
2739
        src_x=w-1;
2740
    }else if(src_x<=-block_w){
2741
        src+= (1-block_w-src_x);
2742
        src_x=1-block_w;
2743
    }
2744

    
2745
    start_y= FFMAX(0, -src_y);
2746
    start_x= FFMAX(0, -src_x);
2747
    end_y= FFMIN(block_h, h-src_y);
2748
    end_x= FFMIN(block_w, w-src_x);
2749

    
2750
    // copy existing part
2751
    for(y=start_y; y<end_y; y++){
2752
        for(x=start_x; x<end_x; x++){
2753
            buf[x + y*linesize]= src[x + y*linesize];
2754
        }
2755
    }
2756

    
2757
    //top
2758
    for(y=0; y<start_y; y++){
2759
        for(x=start_x; x<end_x; x++){
2760
            buf[x + y*linesize]= buf[x + start_y*linesize];
2761
        }
2762
    }
2763

    
2764
    //bottom
2765
    for(y=end_y; y<block_h; y++){
2766
        for(x=start_x; x<end_x; x++){
2767
            buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2768
        }
2769
    }
2770

    
2771
    for(y=0; y<block_h; y++){
2772
       //left
2773
        for(x=0; x<start_x; x++){
2774
            buf[x + y*linesize]= buf[start_x + y*linesize];
2775
        }
2776

    
2777
       //right
2778
        for(x=end_x; x<block_w; x++){
2779
            buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2780
        }
2781
    }
2782
}
2783

    
2784
static inline int hpel_motion(MpegEncContext *s,
2785
                                  uint8_t *dest, uint8_t *src,
2786
                                  int field_based, int field_select,
2787
                                  int src_x, int src_y,
2788
                                  int width, int height, int stride,
2789
                                  int h_edge_pos, int v_edge_pos,
2790
                                  int w, int h, op_pixels_func *pix_op,
2791
                                  int motion_x, int motion_y)
2792
{
2793
    int dxy;
2794
    int emu=0;
2795

    
2796
    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2797
    src_x += motion_x >> 1;
2798
    src_y += motion_y >> 1;
2799

    
2800
    /* WARNING: do no forget half pels */
2801
    src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2802
    if (src_x == width)
2803
        dxy &= ~1;
2804
    src_y = clip(src_y, -16, height);
2805
    if (src_y == height)
2806
        dxy &= ~2;
2807
    src += src_y * stride + src_x;
2808

    
2809
    if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2810
        if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2811
           || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2812
            ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2813
                             src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2814
            src= s->edge_emu_buffer;
2815
            emu=1;
2816
        }
2817
    }
2818
    if(field_select)
2819
        src += s->linesize;
2820
    pix_op[dxy](dest, src, stride, h);
2821
    return emu;
2822
}
2823

    
2824
static inline int hpel_motion_lowres(MpegEncContext *s,
2825
                                  uint8_t *dest, uint8_t *src,
2826
                                  int field_based, int field_select,
2827
                                  int src_x, int src_y,
2828
                                  int width, int height, int stride,
2829
                                  int h_edge_pos, int v_edge_pos,
2830
                                  int w, int h, h264_chroma_mc_func *pix_op,
2831
                                  int motion_x, int motion_y)
2832
{
2833
    const int lowres= s->avctx->lowres;
2834
    const int s_mask= (2<<lowres)-1;
2835
    int emu=0;
2836
    int sx, sy;
2837

    
2838
    if(s->quarter_sample){
2839
        motion_x/=2;
2840
        motion_y/=2;
2841
    }
2842

    
2843
    sx= motion_x & s_mask;
2844
    sy= motion_y & s_mask;
2845
    src_x += motion_x >> (lowres+1);
2846
    src_y += motion_y >> (lowres+1);
2847

    
2848
    src += src_y * stride + src_x;
2849

    
2850
    if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2851
       || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2852
        ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2853
                            src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2854
        src= s->edge_emu_buffer;
2855
        emu=1;
2856
    }
2857

    
2858
    sx <<= 2 - lowres;
2859
    sy <<= 2 - lowres;
2860
    if(field_select)
2861
        src += s->linesize;
2862
    pix_op[lowres](dest, src, stride, h, sx, sy);
2863
    return emu;
2864
}
2865

    
2866
/* apply one mpeg motion vector to the three components */
2867
static always_inline void mpeg_motion(MpegEncContext *s,
2868
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2869
                               int field_based, int bottom_field, int field_select,
2870
                               uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2871
                               int motion_x, int motion_y, int h)
2872
{
2873
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2874
    int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2875

    
2876
#if 0
2877
if(s->quarter_sample)
2878
{
2879
    motion_x>>=1;
2880
    motion_y>>=1;
2881
}
2882
#endif
2883

    
2884
    v_edge_pos = s->v_edge_pos >> field_based;
2885
    linesize   = s->current_picture.linesize[0] << field_based;
2886
    uvlinesize = s->current_picture.linesize[1] << field_based;
2887

    
2888
    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2889
    src_x = s->mb_x* 16               + (motion_x >> 1);
2890
    src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2891

    
2892
    if (s->out_format == FMT_H263) {
2893
        if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2894
            mx = (motion_x>>1)|(motion_x&1);
2895
            my = motion_y >>1;
2896
            uvdxy = ((my & 1) << 1) | (mx & 1);
2897
            uvsrc_x = s->mb_x* 8               + (mx >> 1);
2898
            uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2899
        }else{
2900
            uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2901
            uvsrc_x = src_x>>1;
2902
            uvsrc_y = src_y>>1;
2903
        }
2904
    }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2905
        mx = motion_x / 4;
2906
        my = motion_y / 4;
2907
        uvdxy = 0;
2908
        uvsrc_x = s->mb_x*8 + mx;
2909
        uvsrc_y = s->mb_y*8 + my;
2910
    } else {
2911
        if(s->chroma_y_shift){
2912
            mx = motion_x / 2;
2913
            my = motion_y / 2;
2914
            uvdxy = ((my & 1) << 1) | (mx & 1);
2915
            uvsrc_x = s->mb_x* 8               + (mx >> 1);
2916
            uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2917
        } else {
2918
            if(s->chroma_x_shift){
2919
            //Chroma422
2920
                mx = motion_x / 2;
2921
                uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2922
                uvsrc_x = s->mb_x* 8           + (mx >> 1);
2923
                uvsrc_y = src_y;
2924
            } else {
2925
            //Chroma444
2926
                uvdxy = dxy;
2927
                uvsrc_x = src_x;
2928
                uvsrc_y = src_y;
2929
            }
2930
        }
2931
    }
2932

    
2933
    ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2934
    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2935
    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2936

    
2937
    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2938
       || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2939
            if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2940
               s->codec_id == CODEC_ID_MPEG1VIDEO){
2941
                av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2942
                return ;
2943
            }
2944
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2945
                             src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2946
            ptr_y = s->edge_emu_buffer;
2947
            if(!(s->flags&CODEC_FLAG_GRAY)){
2948
                uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2949
                ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2950
                                 uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2951
                ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2952
                                 uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2953
                ptr_cb= uvbuf;
2954
                ptr_cr= uvbuf+16;
2955
            }
2956
    }
2957

    
2958
    if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2959
        dest_y += s->linesize;
2960
        dest_cb+= s->uvlinesize;
2961
        dest_cr+= s->uvlinesize;
2962
    }
2963

    
2964
    if(field_select){
2965
        ptr_y += s->linesize;
2966
        ptr_cb+= s->uvlinesize;
2967
        ptr_cr+= s->uvlinesize;
2968
    }
2969

    
2970
    pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2971

    
2972
    if(!(s->flags&CODEC_FLAG_GRAY)){
2973
        pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2974
        pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2975
    }
2976
#if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
2977
    if(s->out_format == FMT_H261){
2978
        ff_h261_loop_filter(s);
2979
    }
2980
#endif
2981
}
2982

    
2983
/* apply one mpeg motion vector to the three components */
2984
static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2985
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2986
                               int field_based, int bottom_field, int field_select,
2987
                               uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2988
                               int motion_x, int motion_y, int h)
2989
{
2990
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2991
    int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
2992
    const int lowres= s->avctx->lowres;
2993
    const int block_s= 8>>lowres;
2994
    const int s_mask= (2<<lowres)-1;
2995
    const int h_edge_pos = s->h_edge_pos >> lowres;
2996
    const int v_edge_pos = s->v_edge_pos >> lowres;
2997
    linesize   = s->current_picture.linesize[0] << field_based;
2998
    uvlinesize = s->current_picture.linesize[1] << field_based;
2999

    
3000
    if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3001
        motion_x/=2;
3002
        motion_y/=2;
3003
    }
3004

    
3005
    if(field_based){
3006
        motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3007
    }
3008

    
3009
    sx= motion_x & s_mask;
3010
    sy= motion_y & s_mask;
3011
    src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3012
    src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3013

    
3014
    if (s->out_format == FMT_H263) {
3015
        uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3016
        uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3017
        uvsrc_x = src_x>>1;
3018
        uvsrc_y = src_y>>1;
3019
    }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3020
        mx = motion_x / 4;
3021
        my = motion_y / 4;
3022
        uvsx = (2*mx) & s_mask;
3023
        uvsy = (2*my) & s_mask;
3024
        uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3025
        uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3026
    } else {
3027
        mx = motion_x / 2;
3028
        my = motion_y / 2;
3029
        uvsx = mx & s_mask;
3030
        uvsy = my & s_mask;
3031
        uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3032
        uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3033
    }
3034

    
3035
    ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3036
    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3037
    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3038

    
3039
    if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3040
       || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3041
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3042
                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3043
            ptr_y = s->edge_emu_buffer;
3044
            if(!(s->flags&CODEC_FLAG_GRAY)){
3045
                uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3046
                ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3047
                                 uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3048
                ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3049
                                 uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3050
                ptr_cb= uvbuf;
3051
                ptr_cr= uvbuf+16;
3052
            }
3053
    }
3054

    
3055
    if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3056
        dest_y += s->linesize;
3057
        dest_cb+= s->uvlinesize;
3058
        dest_cr+= s->uvlinesize;
3059
    }
3060

    
3061
    if(field_select){
3062
        ptr_y += s->linesize;
3063
        ptr_cb+= s->uvlinesize;
3064
        ptr_cr+= s->uvlinesize;
3065
    }
3066

    
3067
    sx <<= 2 - lowres;
3068
    sy <<= 2 - lowres;
3069
    pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3070

    
3071
    if(!(s->flags&CODEC_FLAG_GRAY)){
3072
        uvsx <<= 2 - lowres;
3073
        uvsy <<= 2 - lowres;
3074
        pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3075
        pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3076
    }
3077
    //FIXME h261 lowres loop filter
3078
}
3079

    
3080
//FIXME move to dsputil, avg variant, 16x16 version
3081
static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3082
    int x;
3083
    uint8_t * const top   = src[1];
3084
    uint8_t * const left  = src[2];
3085
    uint8_t * const mid   = src[0];
3086
    uint8_t * const right = src[3];
3087
    uint8_t * const bottom= src[4];
3088
#define OBMC_FILTER(x, t, l, m, r, b)\
3089
    dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3090
#define OBMC_FILTER4(x, t, l, m, r, b)\
3091
    OBMC_FILTER(x         , t, l, m, r, b);\
3092
    OBMC_FILTER(x+1       , t, l, m, r, b);\
3093
    OBMC_FILTER(x  +stride, t, l, m, r, b);\
3094
    OBMC_FILTER(x+1+stride, t, l, m, r, b);
3095

    
3096
    x=0;
3097
    OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3098
    OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3099
    OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3100
    OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3101
    OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3102
    OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3103
    x+= stride;
3104
    OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3105
    OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3106
    OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3107
    OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3108
    x+= stride;
3109
    OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3110
    OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3111
    OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3112
    OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3113
    x+= 2*stride;
3114
    OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3115
    OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3116
    OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3117
    OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3118
    x+= 2*stride;
3119
    OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3120
    OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3121
    OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3122
    OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3123
    OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3124
    OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3125
    x+= stride;
3126
    OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3127
    OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3128
    OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3129
    OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3130
}
3131

    
3132
/* obmc for 1 8x8 luma block */
3133
static inline void obmc_motion(MpegEncContext *s,
3134
                               uint8_t *dest, uint8_t *src,
3135
                               int src_x, int src_y,
3136
                               op_pixels_func *pix_op,
3137
                               int16_t mv[5][2]/* mid top left right bottom*/)
3138
#define MID    0
3139
{
3140
    int i;
3141
    uint8_t *ptr[5];
3142

    
3143
    assert(s->quarter_sample==0);
3144

    
3145
    for(i=0; i<5; i++){
3146
        if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3147
            ptr[i]= ptr[MID];
3148
        }else{
3149
            ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3150
            hpel_motion(s, ptr[i], src, 0, 0,
3151
                        src_x, src_y,
3152
                        s->width, s->height, s->linesize,
3153
                        s->h_edge_pos, s->v_edge_pos,
3154
                        8, 8, pix_op,
3155
                        mv[i][0], mv[i][1]);
3156
        }
3157
    }
3158

    
3159
    put_obmc(dest, ptr, s->linesize);
3160
}
3161

    
3162
static inline void qpel_motion(MpegEncContext *s,
3163
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3164
                               int field_based, int bottom_field, int field_select,
3165
                               uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3166
                               qpel_mc_func (*qpix_op)[16],
3167
                               int motion_x, int motion_y, int h)
3168
{
3169
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3170
    int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3171

    
3172
    dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3173
    src_x = s->mb_x *  16                 + (motion_x >> 2);
3174
    src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3175

    
3176
    v_edge_pos = s->v_edge_pos >> field_based;
3177
    linesize = s->linesize << field_based;
3178
    uvlinesize = s->uvlinesize << field_based;
3179

    
3180
    if(field_based){
3181
        mx= motion_x/2;
3182
        my= motion_y>>1;
3183
    }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3184
        static const int rtab[8]= {0,0,1,1,0,0,0,1};
3185
        mx= (motion_x>>1) + rtab[motion_x&7];
3186
        my= (motion_y>>1) + rtab[motion_y&7];
3187
    }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3188
        mx= (motion_x>>1)|(motion_x&1);
3189
        my= (motion_y>>1)|(motion_y&1);
3190
    }else{
3191
        mx= motion_x/2;
3192
        my= motion_y/2;
3193
    }
3194
    mx= (mx>>1)|(mx&1);
3195
    my= (my>>1)|(my&1);
3196

    
3197
    uvdxy= (mx&1) | ((my&1)<<1);
3198
    mx>>=1;
3199
    my>>=1;
3200

    
3201
    uvsrc_x = s->mb_x *  8                 + mx;
3202
    uvsrc_y = s->mb_y * (8 >> field_based) + my;
3203

    
3204
    ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3205
    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3206
    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3207

    
3208
    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3209
       || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3210
        ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3211
                         src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3212
        ptr_y= s->edge_emu_buffer;
3213
        if(!(s->flags&CODEC_FLAG_GRAY)){
3214
            uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3215
            ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3216
                             uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3217
            ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3218
                             uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3219
            ptr_cb= uvbuf;
3220
            ptr_cr= uvbuf + 16;
3221
        }
3222
    }
3223

    
3224
    if(!field_based)
3225
        qpix_op[0][dxy](dest_y, ptr_y, linesize);
3226
    else{
3227
        if(bottom_field){
3228
            dest_y += s->linesize;
3229
            dest_cb+= s->uvlinesize;
3230
            dest_cr+= s->uvlinesize;
3231
        }
3232

    
3233
        if(field_select){
3234
            ptr_y  += s->linesize;
3235
            ptr_cb += s->uvlinesize;
3236
            ptr_cr += s->uvlinesize;
3237
        }
3238
        //damn interlaced mode
3239
        //FIXME boundary mirroring is not exactly correct here
3240
        qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3241
        qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3242
    }
3243
    if(!(s->flags&CODEC_FLAG_GRAY)){
3244
        pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3245
        pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3246
    }
3247
}
3248

    
3249
inline int ff_h263_round_chroma(int x){
3250
    if (x >= 0)
3251
        return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3252
    else {
3253
        x = -x;
3254
        return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3255
    }
3256
}
3257

    
3258
/**
3259
 * h263 chorma 4mv motion compensation.
3260
 */
3261
static inline void chroma_4mv_motion(MpegEncContext *s,
3262
                                     uint8_t *dest_cb, uint8_t *dest_cr,
3263
                                     uint8_t **ref_picture,
3264
                                     op_pixels_func *pix_op,
3265
                                     int mx, int my){
3266
    int dxy, emu=0, src_x, src_y, offset;
3267
    uint8_t *ptr;
3268

    
3269
    /* In case of 8X8, we construct a single chroma motion vector
3270
       with a special rounding */
3271
    mx= ff_h263_round_chroma(mx);
3272
    my= ff_h263_round_chroma(my);
3273

    
3274
    dxy = ((my & 1) << 1) | (mx & 1);
3275
    mx >>= 1;
3276
    my >>= 1;
3277

    
3278
    src_x = s->mb_x * 8 + mx;
3279
    src_y = s->mb_y * 8 + my;
3280
    src_x = clip(src_x, -8, s->width/2);
3281
    if (src_x == s->width/2)
3282
        dxy &= ~1;
3283
    src_y = clip(src_y, -8, s->height/2);
3284
    if (src_y == s->height/2)
3285
        dxy &= ~2;
3286

    
3287
    offset = (src_y * (s->uvlinesize)) + src_x;
3288
    ptr = ref_picture[1] + offset;
3289
    if(s->flags&CODEC_FLAG_EMU_EDGE){
3290
        if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3291
           || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3292
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3293
            ptr= s->edge_emu_buffer;
3294
            emu=1;
3295
        }
3296
    }
3297
    pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3298

    
3299
    ptr = ref_picture[2] + offset;
3300
    if(emu){
3301
        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3302
        ptr= s->edge_emu_buffer;
3303
    }
3304
    pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3305
}
3306

    
3307
static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3308
                                     uint8_t *dest_cb, uint8_t *dest_cr,
3309
                                     uint8_t **ref_picture,
3310
                                     h264_chroma_mc_func *pix_op,
3311
                                     int mx, int my){
3312
    const int lowres= s->avctx->lowres;
3313
    const int block_s= 8>>lowres;
3314
    const int s_mask= (2<<lowres)-1;
3315
    const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3316
    const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3317
    int emu=0, src_x, src_y, offset, sx, sy;
3318
    uint8_t *ptr;
3319

    
3320
    if(s->quarter_sample){
3321
        mx/=2;
3322
        my/=2;
3323
    }
3324

    
3325
    /* In case of 8X8, we construct a single chroma motion vector
3326
       with a special rounding */
3327
    mx= ff_h263_round_chroma(mx);
3328
    my= ff_h263_round_chroma(my);
3329

    
3330
    sx= mx & s_mask;
3331
    sy= my & s_mask;
3332
    src_x = s->mb_x*block_s + (mx >> (lowres+1));
3333
    src_y = s->mb_y*block_s + (my >> (lowres+1));
3334

    
3335
    offset = src_y * s->uvlinesize + src_x;
3336
    ptr = ref_picture[1] + offset;
3337
    if(s->flags&CODEC_FLAG_EMU_EDGE){
3338
        if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3339
           || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3340
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3341
            ptr= s->edge_emu_buffer;
3342
            emu=1;
3343
        }
3344
    }
3345
    sx <<= 2 - lowres;
3346
    sy <<= 2 - lowres;
3347
    pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3348

    
3349
    ptr = ref_picture[2] + offset;
3350
    if(emu){
3351
        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3352
        ptr= s->edge_emu_buffer;
3353
    }
3354
    pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3355
}
3356

    
3357
/**
3358
 * motion compensation of a single macroblock
3359
 * @param s context
3360
 * @param dest_y luma destination pointer
3361
 * @param dest_cb chroma cb/u destination pointer
3362
 * @param dest_cr chroma cr/v destination pointer
3363
 * @param dir direction (0->forward, 1->backward)
3364
 * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3365
 * @param pic_op halfpel motion compensation function (average or put normally)
3366
 * @param pic_op qpel motion compensation function (average or put normally)
3367
 * the motion vectors are taken from s->mv and the MV type from s->mv_type
3368
 */
3369
static inline void MPV_motion(MpegEncContext *s,
3370
                              uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3371
                              int dir, uint8_t **ref_picture,
3372
                              op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3373
{
3374
    int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3375
    int mb_x, mb_y, i;
3376
    uint8_t *ptr, *dest;
3377

    
3378
    mb_x = s->mb_x;
3379
    mb_y = s->mb_y;
3380

    
3381
    if(s->obmc && s->pict_type != B_TYPE){
3382
        int16_t mv_cache[4][4][2];
3383
        const int xy= s->mb_x + s->mb_y*s->mb_stride;
3384
        const int mot_stride= s->b8_stride;
3385
        const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3386

    
3387
        assert(!s->mb_skipped);
3388

    
3389
        memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3390
        memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3391
        memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3392

    
3393
        if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3394
            memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3395
        }else{
3396
            memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3397
        }
3398

    
3399
        if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3400
            *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3401
            *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3402
        }else{
3403
            *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3404
            *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3405
        }
3406

    
3407
        if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3408
            *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3409
            *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3410
        }else{
3411
            *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3412
            *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3413
        }
3414

    
3415
        mx = 0;
3416
        my = 0;
3417
        for(i=0;i<4;i++) {
3418
            const int x= (i&1)+1;
3419
            const int y= (i>>1)+1;
3420
            int16_t mv[5][2]= {
3421
                {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3422
                {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3423
                {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3424
                {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3425
                {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3426
            //FIXME cleanup
3427
            obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3428
                        ref_picture[0],
3429
                        mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3430
                        pix_op[1],
3431
                        mv);
3432

    
3433
            mx += mv[0][0];
3434
            my += mv[0][1];
3435
        }
3436
        if(!(s->flags&CODEC_FLAG_GRAY))
3437
            chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3438

    
3439
        return;
3440
    }
3441

    
3442
    switch(s->mv_type) {
3443
    case MV_TYPE_16X16:
3444
        if(s->mcsel){
3445
            if(s->real_sprite_warping_points==1){
3446
                gmc1_motion(s, dest_y, dest_cb, dest_cr,
3447
                            ref_picture);
3448
            }else{
3449
                gmc_motion(s, dest_y, dest_cb, dest_cr,
3450
                            ref_picture);
3451
            }
3452
        }else if(s->quarter_sample){
3453
            qpel_motion(s, dest_y, dest_cb, dest_cr,
3454
                        0, 0, 0,
3455
                        ref_picture, pix_op, qpix_op,
3456
                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
3457
        }else if(s->mspel){
3458
            ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3459
                        ref_picture, pix_op,
3460
                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
3461
        }else
3462
        {
3463
            mpeg_motion(s, dest_y, dest_cb, dest_cr,
3464
                        0, 0, 0,
3465
                        ref_picture, pix_op,
3466
                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
3467
        }
3468
        break;
3469
    case MV_TYPE_8X8:
3470
        mx = 0;
3471
        my = 0;
3472
        if(s->quarter_sample){
3473
            for(i=0;i<4;i++) {
3474
                motion_x = s->mv[dir][i][0];
3475
                motion_y = s->mv[dir][i][1];
3476

    
3477
                dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3478
                src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3479
                src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3480

    
3481
                /* WARNING: do no forget half pels */
3482
                src_x = clip(src_x, -16, s->width);
3483
                if (src_x == s->width)
3484
                    dxy &= ~3;
3485
                src_y = clip(src_y, -16, s->height);
3486
                if (src_y == s->height)
3487
                    dxy &= ~12;
3488

    
3489
                ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3490
                if(s->flags&CODEC_FLAG_EMU_EDGE){
3491
                    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3492
                       || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3493
                        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3494
                        ptr= s->edge_emu_buffer;
3495
                    }
3496
                }
3497
                dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3498
                qpix_op[1][dxy](dest, ptr, s->linesize);
3499

    
3500
                mx += s->mv[dir][i][0]/2;
3501
                my += s->mv[dir][i][1]/2;
3502
            }
3503
        }else{
3504
            for(i=0;i<4;i++) {
3505
                hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3506
                            ref_picture[0], 0, 0,
3507
                            mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3508
                            s->width, s->height, s->linesize,
3509
                            s->h_edge_pos, s->v_edge_pos,
3510
                            8, 8, pix_op[1],
3511
                            s->mv[dir][i][0], s->mv[dir][i][1]);
3512

    
3513
                mx += s->mv[dir][i][0];
3514
                my += s->mv[dir][i][1];
3515
            }
3516
        }
3517

    
3518
        if(!(s->flags&CODEC_FLAG_GRAY))
3519
            chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3520
        break;
3521
    case MV_TYPE_FIELD:
3522
        if (s->picture_structure == PICT_FRAME) {
3523
            if(s->quarter_sample){
3524
                for(i=0; i<2; i++){
3525
                    qpel_motion(s, dest_y, dest_cb, dest_cr,
3526
                                1, i, s->field_select[dir][i],
3527
                                ref_picture, pix_op, qpix_op,
3528
                                s->mv[dir][i][0], s->mv[dir][i][1], 8);
3529
                }
3530
            }else{
3531
                /* top field */
3532
                mpeg_motion(s, dest_y, dest_cb, dest_cr,
3533
                            1, 0, s->field_select[dir][0],
3534
                            ref_picture, pix_op,
3535
                            s->mv[dir][0][0], s->mv[dir][0][1], 8);
3536
                /* bottom field */
3537
                mpeg_motion(s, dest_y, dest_cb, dest_cr,
3538
                            1, 1, s->field_select[dir][1],
3539
                            ref_picture, pix_op,
3540
                            s->mv[dir][1][0], s->mv[dir][1][1], 8);
3541
            }
3542
        } else {
3543
            if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3544
                ref_picture= s->current_picture_ptr->data;
3545
            }
3546

    
3547
            mpeg_motion(s, dest_y, dest_cb, dest_cr,
3548
                        0, 0, s->field_select[dir][0],
3549
                        ref_picture, pix_op,
3550
                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
3551
        }
3552
        break;
3553
    case MV_TYPE_16X8:
3554
        for(i=0; i<2; i++){
3555
            uint8_t ** ref2picture;
3556

    
3557
            if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3558
                ref2picture= ref_picture;
3559
            }else{
3560
                ref2picture= s->current_picture_ptr->data;
3561
            }
3562

    
3563
            mpeg_motion(s, dest_y, dest_cb, dest_cr,
3564
                        0, 0, s->field_select[dir][i],
3565
                        ref2picture, pix_op,
3566
                        s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3567

    
3568
            dest_y += 16*s->linesize;
3569
            dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3570
            dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3571
        }
3572
        break;
3573
    case MV_TYPE_DMV:
3574
        if(s->picture_structure == PICT_FRAME){
3575
            for(i=0; i<2; i++){
3576
                int j;
3577
                for(j=0; j<2; j++){
3578
                    mpeg_motion(s, dest_y, dest_cb, dest_cr,
3579
                                1, j, j^i,
3580
                                ref_picture, pix_op,
3581
                                s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3582
                }
3583
                pix_op = s->dsp.avg_pixels_tab;
3584
            }
3585
        }else{
3586
            for(i=0; i<2; i++){
3587
                mpeg_motion(s, dest_y, dest_cb, dest_cr,
3588
                            0, 0, s->picture_structure != i+1,
3589
                            ref_picture, pix_op,
3590
                            s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3591

    
3592
                // after put we make avg of the same block
3593
                pix_op=s->dsp.avg_pixels_tab;
3594

    
3595
                //opposite parity is always in the same frame if this is second field
3596
                if(!s->first_field){
3597
                    ref_picture = s->current_picture_ptr->data;
3598
                }
3599
            }
3600
        }
3601
    break;
3602
    default: assert(0);
3603
    }
3604
}
3605

    
3606
/**
3607
 * motion compensation of a single macroblock
3608
 * @param s context
3609
 * @param dest_y luma destination pointer
3610
 * @param dest_cb chroma cb/u destination pointer
3611
 * @param dest_cr chroma cr/v destination pointer
3612
 * @param dir direction (0->forward, 1->backward)
3613
 * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3614
 * @param pic_op halfpel motion compensation function (average or put normally)
3615
 * the motion vectors are taken from s->mv and the MV type from s->mv_type
3616
 */
3617
static inline void MPV_motion_lowres(MpegEncContext *s,
3618
                              uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3619
                              int dir, uint8_t **ref_picture,
3620
                              h264_chroma_mc_func *pix_op)
3621
{
3622
    int mx, my;
3623
    int mb_x, mb_y, i;
3624
    const int lowres= s->avctx->lowres;
3625
    const int block_s= 8>>lowres;
3626

    
3627
    mb_x = s->mb_x;
3628
    mb_y = s->mb_y;
3629

    
3630
    switch(s->mv_type) {
3631
    case MV_TYPE_16X16:
3632
        mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3633
                    0, 0, 0,
3634
                    ref_picture, pix_op,
3635
                    s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3636
        break;
3637
    case MV_TYPE_8X8:
3638
        mx = 0;
3639
        my = 0;
3640
            for(i=0;i<4;i++) {
3641
                hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3642
                            ref_picture[0], 0, 0,
3643
                            (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3644
                            s->width, s->height, s->linesize,
3645
                            s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3646
                            block_s, block_s, pix_op,
3647
                            s->mv[dir][i][0], s->mv[dir][i][1]);
3648

    
3649
                mx += s->mv[dir][i][0];
3650
                my += s->mv[dir][i][1];
3651
            }
3652

    
3653
        if(!(s->flags&CODEC_FLAG_GRAY))
3654
            chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3655
        break;
3656
    case MV_TYPE_FIELD:
3657
        if (s->picture_structure == PICT_FRAME) {
3658
            /* top field */
3659
            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3660
                        1, 0, s->field_select[dir][0],
3661
                        ref_picture, pix_op,
3662
                        s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3663
            /* bottom field */
3664
            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3665
                        1, 1, s->field_select[dir][1],
3666
                        ref_picture, pix_op,
3667
                        s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3668
        } else {
3669
            if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3670
                ref_picture= s->current_picture_ptr->data;
3671
            }
3672

    
3673
            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3674
                        0, 0, s->field_select[dir][0],
3675
                        ref_picture, pix_op,
3676
                        s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3677
        }
3678
        break;
3679
    case MV_TYPE_16X8:
3680
        for(i=0; i<2; i++){
3681
            uint8_t ** ref2picture;
3682

    
3683
            if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3684
                ref2picture= ref_picture;
3685
            }else{
3686
                ref2picture= s->current_picture_ptr->data;
3687
            }
3688

    
3689
            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3690
                        0, 0, s->field_select[dir][i],
3691
                        ref2picture, pix_op,
3692
                        s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3693

    
3694
            dest_y += 2*block_s*s->linesize;
3695
            dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3696
            dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3697
        }
3698
        break;
3699
    case MV_TYPE_DMV:
3700
        if(s->picture_structure == PICT_FRAME){
3701
            for(i=0; i<2; i++){
3702
                int j;
3703
                for(j=0; j<2; j++){
3704
                    mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3705
                                1, j, j^i,
3706
                                ref_picture, pix_op,
3707
                                s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3708
                }
3709
                pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3710
            }
3711
        }else{
3712
            for(i=0; i<2; i++){
3713
                mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3714
                            0, 0, s->picture_structure != i+1,
3715
                            ref_picture, pix_op,
3716
                            s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3717

    
3718
                // after put we make avg of the same block
3719
                pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3720

    
3721
                //opposite parity is always in the same frame if this is second field
3722
                if(!s->first_field){
3723
                    ref_picture = s->current_picture_ptr->data;
3724
                }
3725
            }
3726
        }
3727
    break;
3728
    default: assert(0);
3729
    }
3730
}
3731

    
3732
/* put block[] to dest[] */
3733
static inline void put_dct(MpegEncContext *s,
3734
                           DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3735
{
3736
    s->dct_unquantize_intra(s, block, i, qscale);
3737
    s->dsp.idct_put (dest, line_size, block);
3738
}
3739

    
3740
/* add block[] to dest[] */
3741
static inline void add_dct(MpegEncContext *s,
3742
                           DCTELEM *block, int i, uint8_t *dest, int line_size)
3743
{
3744
    if (s->block_last_index[i] >= 0) {
3745
        s->dsp.idct_add (dest, line_size, block);
3746
    }
3747
}
3748

    
3749
static inline void add_dequant_dct(MpegEncContext *s,
3750
                           DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3751
{
3752
    if (s->block_last_index[i] >= 0) {
3753
        s->dct_unquantize_inter(s, block, i, qscale);
3754

    
3755
        s->dsp.idct_add (dest, line_size, block);
3756
    }
3757
}
3758

    
3759
/**
3760
 * cleans dc, ac, coded_block for the current non intra MB
3761
 */
3762
void ff_clean_intra_table_entries(MpegEncContext *s)
3763
{
3764
    int wrap = s->b8_stride;
3765
    int xy = s->block_index[0];
3766

    
3767
    s->dc_val[0][xy           ] =
3768
    s->dc_val[0][xy + 1       ] =
3769
    s->dc_val[0][xy     + wrap] =
3770
    s->dc_val[0][xy + 1 + wrap] = 1024;
3771
    /* ac pred */
3772
    memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3773
    memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3774
    if (s->msmpeg4_version>=3) {
3775
        s->coded_block[xy           ] =
3776
        s->coded_block[xy + 1       ] =
3777
        s->coded_block[xy     + wrap] =
3778
        s->coded_block[xy + 1 + wrap] = 0;
3779
    }
3780
    /* chroma */
3781
    wrap = s->mb_stride;
3782
    xy = s->mb_x + s->mb_y * wrap;
3783
    s->dc_val[1][xy] =
3784
    s->dc_val[2][xy] = 1024;
3785
    /* ac pred */
3786
    memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3787
    memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3788

    
3789
    s->mbintra_table[xy]= 0;
3790
}
3791

    
3792
/* generic function called after a macroblock has been parsed by the
3793
   decoder or after it has been encoded by the encoder.
3794

3795
   Important variables used:
3796
   s->mb_intra : true if intra macroblock
3797
   s->mv_dir   : motion vector direction
3798
   s->mv_type  : motion vector type
3799
   s->mv       : motion vector
3800
   s->interlaced_dct : true if interlaced dct used (mpeg2)
3801
 */
3802
static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3803
{
3804
    int mb_x, mb_y;
3805
    const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3806
#ifdef HAVE_XVMC
3807
    if(s->avctx->xvmc_acceleration){
3808
        XVMC_decode_mb(s);//xvmc uses pblocks
3809
        return;
3810
    }
3811
#endif
3812

    
3813
    mb_x = s->mb_x;
3814
    mb_y = s->mb_y;
3815

    
3816
    if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3817
       /* save DCT coefficients */
3818
       int i,j;
3819
       DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3820
       for(i=0; i<6; i++)
3821
           for(j=0; j<64; j++)
3822
               *dct++ = block[i][s->dsp.idct_permutation[j]];
3823
    }
3824

    
3825
    s->current_picture.qscale_table[mb_xy]= s->qscale;
3826

    
3827
    /* update DC predictors for P macroblocks */
3828
    if (!s->mb_intra) {
3829
        if (s->h263_pred || s->h263_aic) {
3830
            if(s->mbintra_table[mb_xy])
3831
                ff_clean_intra_table_entries(s);
3832
        } else {
3833
            s->last_dc[0] =
3834
            s->last_dc[1] =
3835
            s->last_dc[2] = 128 << s->intra_dc_precision;
3836
        }
3837
    }
3838
    else if (s->h263_pred || s->h263_aic)
3839
        s->mbintra_table[mb_xy]=1;
3840

    
3841
    if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3842
        uint8_t *dest_y, *dest_cb, *dest_cr;
3843
        int dct_linesize, dct_offset;
3844
        op_pixels_func (*op_pix)[4];
3845
        qpel_mc_func (*op_qpix)[16];
3846
        const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3847
        const int uvlinesize= s->current_picture.linesize[1];
3848
        const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3849
        const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3850

    
3851
        /* avoid copy if macroblock skipped in last frame too */
3852
        /* skip only during decoding as we might trash the buffers during encoding a bit */
3853
        if(!s->encoding){
3854
            uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3855
            const int age= s->current_picture.age;
3856

    
3857
            assert(age);
3858

    
3859
            if (s->mb_skipped) {
3860
                s->mb_skipped= 0;
3861
                assert(s->pict_type!=I_TYPE);
3862

    
3863
                (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3864
                if(*mbskip_ptr >99) *mbskip_ptr= 99;
3865

    
3866
                /* if previous was skipped too, then nothing to do !  */
3867
                if (*mbskip_ptr >= age && s->current_picture.reference){
3868
                    return;
3869
                }
3870
            } else if(!s->current_picture.reference){
3871
                (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3872
                if(*mbskip_ptr >99) *mbskip_ptr= 99;
3873
            } else{
3874
                *mbskip_ptr = 0; /* not skipped */
3875
            }
3876
        }
3877

    
3878
        dct_linesize = linesize << s->interlaced_dct;
3879
        dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3880

    
3881
        if(readable){
3882
            dest_y=  s->dest[0];
3883
            dest_cb= s->dest[1];
3884
            dest_cr= s->dest[2];
3885
        }else{
3886
            dest_y = s->b_scratchpad;
3887
            dest_cb= s->b_scratchpad+16*linesize;
3888
            dest_cr= s->b_scratchpad+32*linesize;
3889
        }
3890

    
3891
        if (!s->mb_intra) {
3892
            /* motion handling */
3893
            /* decoding or more than one mb_type (MC was already done otherwise) */
3894
            if(!s->encoding){
3895
                if(lowres_flag){
3896
                    h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3897

    
3898
                    if (s->mv_dir & MV_DIR_FORWARD) {
3899
                        MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3900
                        op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3901
                    }
3902
                    if (s->mv_dir & MV_DIR_BACKWARD) {
3903
                        MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3904
                    }
3905
                }else{
3906
                    if ((!s->no_rounding) || s->pict_type==B_TYPE){
3907
                        op_pix = s->dsp.put_pixels_tab;
3908
                        op_qpix= s->dsp.put_qpel_pixels_tab;
3909
                    }else{
3910
                        op_pix = s->dsp.put_no_rnd_pixels_tab;
3911
                        op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3912
                    }
3913
                    if (s->mv_dir & MV_DIR_FORWARD) {
3914
                        MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3915
                        op_pix = s->dsp.avg_pixels_tab;
3916
                        op_qpix= s->dsp.avg_qpel_pixels_tab;
3917
                    }
3918
                    if (s->mv_dir & MV_DIR_BACKWARD) {
3919
                        MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3920
                    }
3921
                }
3922
            }
3923

    
3924
            /* skip dequant / idct if we are really late ;) */
3925
            if(s->hurry_up>1) goto skip_idct;
3926
            if(s->avctx->skip_idct){
3927
                if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
3928
                   ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
3929
                   || s->avctx->skip_idct >= AVDISCARD_ALL)
3930
                    goto skip_idct;
3931
            }
3932

    
3933
            /* add dct residue */
3934
            if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3935
                                || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3936
                add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3937
                add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3938
                add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3939
                add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3940

    
3941
                if(!(s->flags&CODEC_FLAG_GRAY)){
3942
                    add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3943
                    add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3944
                }
3945
            } else if(s->codec_id != CODEC_ID_WMV2){
3946
                add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3947
                add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3948
                add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3949
                add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3950

    
3951
                if(!(s->flags&CODEC_FLAG_GRAY)){
3952
                    if(s->chroma_y_shift){//Chroma420
3953
                        add_dct(s, block[4], 4, dest_cb, uvlinesize);
3954
                        add_dct(s, block[5], 5, dest_cr, uvlinesize);
3955
                    }else{
3956
                        //chroma422
3957
                        dct_linesize = uvlinesize << s->interlaced_dct;
3958
                        dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3959

    
3960
                        add_dct(s, block[4], 4, dest_cb, dct_linesize);
3961
                        add_dct(s, block[5], 5, dest_cr, dct_linesize);
3962
                        add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3963
                        add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3964
                        if(!s->chroma_x_shift){//Chroma444
3965
                            add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3966
                            add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3967
                            add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3968
                            add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3969
                        }
3970
                    }
3971
                }//fi gray
3972
            }
3973
            else{
3974
                ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3975
            }
3976
        } else {
3977
            /* dct only in intra block */
3978
            if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
3979
                put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3980
                put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3981
                put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3982
                put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3983

    
3984
                if(!(s->flags&CODEC_FLAG_GRAY)){
3985
                    put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3986
                    put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3987
                }
3988
            }else{
3989
                s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
3990
                s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
3991
                s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
3992
                s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
3993

    
3994
                if(!(s->flags&CODEC_FLAG_GRAY)){
3995
                    if(s->chroma_y_shift){
3996
                        s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
3997
                        s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
3998
                    }else{
3999

    
4000
                        dct_linesize = uvlinesize << s->interlaced_dct;
4001
                        dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4002

    
4003
                        s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4004
                        s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4005
                        s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4006
                        s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4007
                        if(!s->chroma_x_shift){//Chroma444
4008
                            s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4009
                            s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4010
                            s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4011
                            s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4012
                        }
4013
                    }
4014
                }//gray
4015
            }
4016
        }
4017
skip_idct:
4018
        if(!readable){
4019
            s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4020
            s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4021
            s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4022
        }
4023
    }
4024
}
4025

    
4026
void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4027
    if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4028
    else                  MPV_decode_mb_internal(s, block, 0);
4029
}
4030

    
4031
#ifdef CONFIG_ENCODERS
4032

    
4033
static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4034
{
4035
    static const char tab[64]=
4036
        {3,2,2,1,1,1,1,1,
4037
         1,1,1,1,1,1,1,1,
4038
         1,1,1,1,1,1,1,1,
4039
         0,0,0,0,0,0,0,0,
4040
         0,0,0,0,0,0,0,0,
4041
         0,0,0,0,0,0,0,0,
4042
         0,0,0,0,0,0,0,0,
4043
         0,0,0,0,0,0,0,0};
4044
    int score=0;
4045
    int run=0;
4046
    int i;
4047
    DCTELEM *block= s->block[n];
4048
    const int last_index= s->block_last_index[n];
4049
    int skip_dc;
4050

    
4051
    if(threshold<0){
4052
        skip_dc=0;
4053
        threshold= -threshold;
4054
    }else
4055
        skip_dc=1;
4056

    
4057
    /* are all which we could set to zero are allready zero? */
4058
    if(last_index<=skip_dc - 1) return;
4059

    
4060
    for(i=0; i<=last_index; i++){
4061
        const int j = s->intra_scantable.permutated[i];
4062
        const int level = ABS(block[j]);
4063
        if(level==1){
4064
            if(skip_dc && i==0) continue;
4065
            score+= tab[run];
4066
            run=0;
4067
        }else if(level>1){
4068
            return;
4069
        }else{
4070
            run++;
4071
        }
4072
    }
4073
    if(score >= threshold) return;
4074
    for(i=skip_dc; i<=last_index; i++){
4075
        const int j = s->intra_scantable.permutated[i];
4076
        block[j]=0;
4077
    }
4078
    if(block[0]) s->block_last_index[n]= 0;
4079
    else         s->block_last_index[n]= -1;
4080
}
4081

    
4082
static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4083
{
4084
    int i;
4085
    const int maxlevel= s->max_qcoeff;
4086
    const int minlevel= s->min_qcoeff;
4087
    int overflow=0;
4088

    
4089
    if(s->mb_intra){
4090
        i=1; //skip clipping of intra dc
4091
    }else
4092
        i=0;
4093

    
4094
    for(;i<=last_index; i++){
4095
        const int j= s->intra_scantable.permutated[i];
4096
        int level = block[j];
4097

    
4098
        if     (level>maxlevel){
4099
            level=maxlevel;
4100
            overflow++;
4101
        }else if(level<minlevel){
4102
            level=minlevel;
4103
            overflow++;
4104
        }
4105

    
4106
        block[j]= level;
4107
    }
4108

    
4109
    if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4110
        av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4111
}
4112

    
4113
#endif //CONFIG_ENCODERS
4114

    
4115
/**
4116
 *
4117
 * @param h is the normal height, this will be reduced automatically if needed for the last row
4118
 */
4119
void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4120
    if (s->avctx->draw_horiz_band) {
4121
        AVFrame *src;
4122
        int offset[4];
4123

    
4124
        if(s->picture_structure != PICT_FRAME){
4125
            h <<= 1;
4126
            y <<= 1;
4127
            if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4128
        }
4129

    
4130
        h= FFMIN(h, s->avctx->height - y);
4131

    
4132
        if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4133
            src= (AVFrame*)s->current_picture_ptr;
4134
        else if(s->last_picture_ptr)
4135
            src= (AVFrame*)s->last_picture_ptr;
4136
        else
4137
            return;
4138

    
4139
        if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4140
            offset[0]=
4141
            offset[1]=
4142
            offset[2]=
4143
            offset[3]= 0;
4144
        }else{
4145
            offset[0]= y * s->linesize;;
4146
            offset[1]=
4147
            offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4148
            offset[3]= 0;
4149
        }
4150

    
4151
        emms_c();
4152

    
4153
        s->avctx->draw_horiz_band(s->avctx, src, offset,
4154
                                  y, s->picture_structure, h);
4155
    }
4156
}
4157

    
4158
void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4159
    const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4160
    const int uvlinesize= s->current_picture.linesize[1];
4161
    const int mb_size= 4 - s->avctx->lowres;
4162

    
4163
    s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4164
    s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4165
    s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4166
    s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4167
    s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4168
    s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4169
    //block_index is not used by mpeg2, so it is not affected by chroma_format
4170

    
4171
    s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4172
    s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4173
    s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4174

    
4175
    if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4176
    {
4177
        s->dest[0] += s->mb_y *   linesize << mb_size;
4178
        s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4179
        s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4180
    }
4181
}
4182

    
4183
#ifdef CONFIG_ENCODERS
4184

    
4185
static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4186
    int x, y;
4187
//FIXME optimize
4188
    for(y=0; y<8; y++){
4189
        for(x=0; x<8; x++){
4190
            int x2, y2;
4191
            int sum=0;
4192
            int sqr=0;
4193
            int count=0;
4194

    
4195
            for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4196
                for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4197
                    int v= ptr[x2 + y2*stride];
4198
                    sum += v;
4199
                    sqr += v*v;
4200
                    count++;
4201
                }
4202
            }
4203
            weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4204
        }
4205
    }
4206
}
4207

    
4208
static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4209
{
4210
    int16_t weight[6][64];
4211
    DCTELEM orig[6][64];
4212
    const int mb_x= s->mb_x;
4213
    const int mb_y= s->mb_y;
4214
    int i;
4215
    int skip_dct[6];
4216
    int dct_offset   = s->linesize*8; //default for progressive frames
4217
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4218
    int wrap_y, wrap_c;
4219

    
4220
    for(i=0; i<6; i++) skip_dct[i]=0;
4221

    
4222
    if(s->adaptive_quant){
4223
        const int last_qp= s->qscale;
4224
        const int mb_xy= mb_x + mb_y*s->mb_stride;
4225

    
4226
        s->lambda= s->lambda_table[mb_xy];
4227
        update_qscale(s);
4228

    
4229
        if(!(s->flags&CODEC_FLAG_QP_RD)){
4230
            s->dquant= s->qscale - last_qp;
4231

    
4232
            if(s->out_format==FMT_H263){
4233
                s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4234

    
4235
                if(s->codec_id==CODEC_ID_MPEG4){
4236
                    if(!s->mb_intra){
4237
                        if(s->pict_type == B_TYPE){
4238
                            if(s->dquant&1)
4239
                                s->dquant= (s->dquant/2)*2;
4240
                            if(s->mv_dir&MV_DIRECT)
4241
                                s->dquant= 0;
4242
                        }
4243
                        if(s->mv_type==MV_TYPE_8X8)
4244
                            s->dquant=0;
4245
                    }
4246
                }
4247
            }
4248
        }
4249
        ff_set_qscale(s, last_qp + s->dquant);
4250
    }else if(s->flags&CODEC_FLAG_QP_RD)
4251
        ff_set_qscale(s, s->qscale + s->dquant);
4252

    
4253
    wrap_y = s->linesize;
4254
    wrap_c = s->uvlinesize;
4255
    ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4256
    ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4257
    ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4258

    
4259
    if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4260
        uint8_t *ebuf= s->edge_emu_buffer + 32;
4261
        ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4262
        ptr_y= ebuf;
4263
        ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4264
        ptr_cb= ebuf+18*wrap_y;
4265
        ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4266
        ptr_cr= ebuf+18*wrap_y+8;
4267
    }
4268

    
4269
    if (s->mb_intra) {
4270
        if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4271
            int progressive_score, interlaced_score;
4272

    
4273
            s->interlaced_dct=0;
4274
            progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4275
                              +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4276

    
4277
            if(progressive_score > 0){
4278
                interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4279
                                  +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4280
                if(progressive_score > interlaced_score){
4281
                    s->interlaced_dct=1;
4282

    
4283
                    dct_offset= wrap_y;
4284
                    wrap_y<<=1;
4285
                }
4286
            }
4287
        }
4288

    
4289
        s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4290
        s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4291
        s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4292
        s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4293

    
4294
        if(s->flags&CODEC_FLAG_GRAY){
4295
            skip_dct[4]= 1;
4296
            skip_dct[5]= 1;
4297
        }else{
4298
            s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4299
            s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4300
        }
4301
    }else{
4302
        op_pixels_func (*op_pix)[4];
4303
        qpel_mc_func (*op_qpix)[16];
4304
        uint8_t *dest_y, *dest_cb, *dest_cr;
4305

    
4306
        dest_y  = s->dest[0];
4307
        dest_cb = s->dest[1];
4308
        dest_cr = s->dest[2];
4309

    
4310
        if ((!s->no_rounding) || s->pict_type==B_TYPE){
4311
            op_pix = s->dsp.put_pixels_tab;
4312
            op_qpix= s->dsp.put_qpel_pixels_tab;
4313
        }else{
4314
            op_pix = s->dsp.put_no_rnd_pixels_tab;
4315
            op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4316
        }
4317

    
4318
        if (s->mv_dir & MV_DIR_FORWARD) {
4319
            MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4320
            op_pix = s->dsp.avg_pixels_tab;
4321
            op_qpix= s->dsp.avg_qpel_pixels_tab;
4322
        }
4323
        if (s->mv_dir & MV_DIR_BACKWARD) {
4324
            MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4325
        }
4326

    
4327
        if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4328
            int progressive_score, interlaced_score;
4329

    
4330
            s->interlaced_dct=0;
4331
            progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4332
                              +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4333

    
4334
            if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4335

    
4336
            if(progressive_score>0){
4337
                interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4338
                                  +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4339

    
4340
                if(progressive_score > interlaced_score){
4341
                    s->interlaced_dct=1;
4342

    
4343
                    dct_offset= wrap_y;
4344
                    wrap_y<<=1;
4345
                }
4346
            }
4347
        }
4348

    
4349
        s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4350
        s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4351
        s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4352
        s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4353

    
4354
        if(s->flags&CODEC_FLAG_GRAY){
4355
            skip_dct[4]= 1;
4356
            skip_dct[5]= 1;
4357
        }else{
4358
            s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4359
            s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4360
        }
4361
        /* pre quantization */
4362
        if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4363
            //FIXME optimize
4364
            if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4365
            if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4366
            if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4367
            if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4368
            if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4369
            if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4370
        }
4371
    }
4372

    
4373
    if(s->avctx->quantizer_noise_shaping){
4374
        if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4375
        if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4376
        if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4377
        if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4378
        if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4379
        if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4380
        memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4381
    }
4382

    
4383
    /* DCT & quantize */
4384
    assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4385
    {
4386
        for(i=0;i<6;i++) {
4387
            if(!skip_dct[i]){
4388
                int overflow;
4389
                s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4390
            // FIXME we could decide to change to quantizer instead of clipping
4391
            // JS: I don't think that would be a good idea it could lower quality instead
4392
            //     of improve it. Just INTRADC clipping deserves changes in quantizer
4393
                if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4394
            }else
4395
                s->block_last_index[i]= -1;
4396
        }
4397
        if(s->avctx->quantizer_noise_shaping){
4398
            for(i=0;i<6;i++) {
4399
                if(!skip_dct[i]){
4400
                    s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4401
                }
4402
            }
4403
        }
4404

    
4405
        if(s->luma_elim_threshold && !s->mb_intra)
4406
            for(i=0; i<4; i++)
4407
                dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4408
        if(s->chroma_elim_threshold && !s->mb_intra)
4409
            for(i=4; i<6; i++)
4410
                dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4411

    
4412
        if(s->flags & CODEC_FLAG_CBP_RD){
4413
            for(i=0;i<6;i++) {
4414
                if(s->block_last_index[i] == -1)
4415
                    s->coded_score[i]= INT_MAX/256;
4416
            }
4417
        }
4418
    }
4419

    
4420
    if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4421
        s->block_last_index[4]=
4422
        s->block_last_index[5]= 0;
4423
        s->block[4][0]=
4424
        s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4425
    }
4426

    
4427
    //non c quantize code returns incorrect block_last_index FIXME
4428
    if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4429
        for(i=0; i<6; i++){
4430
            int j;
4431
            if(s->block_last_index[i]>0){
4432
                for(j=63; j>0; j--){
4433
                    if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4434
                }
4435
                s->block_last_index[i]= j;
4436
            }
4437
        }
4438
    }
4439

    
4440
    /* huffman encode */
4441
    switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4442
    case CODEC_ID_MPEG1VIDEO:
4443
    case CODEC_ID_MPEG2VIDEO:
4444
        mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4445
    case CODEC_ID_MPEG4:
4446
        mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4447
    case CODEC_ID_MSMPEG4V2:
4448
    case CODEC_ID_MSMPEG4V3:
4449
    case CODEC_ID_WMV1:
4450
        msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4451
    case CODEC_ID_WMV2:
4452
         ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4453
#ifdef CONFIG_H261_ENCODER
4454
    case CODEC_ID_H261:
4455
        ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4456
#endif
4457
    case CODEC_ID_H263:
4458
    case CODEC_ID_H263P:
4459
    case CODEC_ID_FLV1:
4460
    case CODEC_ID_RV10:
4461
    case CODEC_ID_RV20:
4462
        h263_encode_mb(s, s->block, motion_x, motion_y); break;
4463
    case CODEC_ID_MJPEG:
4464
        mjpeg_encode_mb(s, s->block); break;
4465
    default:
4466
        assert(0);
4467
    }
4468
}
4469

    
4470
#endif //CONFIG_ENCODERS
4471

    
4472
void ff_mpeg_flush(AVCodecContext *avctx){
4473
    int i;
4474
    MpegEncContext *s = avctx->priv_data;
4475

    
4476
    if(s==NULL || s->picture==NULL)
4477
        return;
4478

    
4479
    for(i=0; i<MAX_PICTURE_COUNT; i++){
4480
       if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4481
                                    || s->picture[i].type == FF_BUFFER_TYPE_USER))
4482
        avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4483
    }
4484
    s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4485

    
4486
    s->mb_x= s->mb_y= 0;
4487

    
4488
    s->parse_context.state= -1;
4489
    s->parse_context.frame_start_found= 0;
4490
    s->parse_context.overread= 0;
4491
    s->parse_context.overread_index= 0;
4492
    s->parse_context.index= 0;
4493
    s->parse_context.last_index= 0;
4494
    s->bitstream_buffer_size=0;
4495
}
4496

    
4497
#ifdef CONFIG_ENCODERS
4498
void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4499
{
4500
    const uint16_t *srcw= (uint16_t*)src;
4501
    int words= length>>4;
4502
    int bits= length&15;
4503
    int i;
4504

    
4505
    if(length==0) return;
4506

    
4507
    if(words < 16){
4508
        for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4509
    }else if(put_bits_count(pb)&7){
4510
        for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4511
    }else{
4512
        for(i=0; put_bits_count(pb)&31; i++)
4513
            put_bits(pb, 8, src[i]);
4514
        flush_put_bits(pb);
4515
        memcpy(pbBufPtr(pb), src+i, 2*words-i);
4516
        skip_put_bytes(pb, 2*words-i);
4517
    }
4518

    
4519
    put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4520
}
4521

    
4522
static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4523
    int i;
4524

    
4525
    memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4526

    
4527
    /* mpeg1 */
4528
    d->mb_skip_run= s->mb_skip_run;
4529
    for(i=0; i<3; i++)
4530
        d->last_dc[i]= s->last_dc[i];
4531

    
4532
    /* statistics */
4533
    d->mv_bits= s->mv_bits;
4534
    d->i_tex_bits= s->i_tex_bits;
4535
    d->p_tex_bits= s->p_tex_bits;
4536
    d->i_count= s->i_count;
4537
    d->f_count= s->f_count;
4538
    d->b_count= s->b_count;
4539
    d->skip_count= s->skip_count;
4540
    d->misc_bits= s->misc_bits;
4541
    d->last_bits= 0;
4542

    
4543
    d->mb_skipped= 0;
4544
    d->qscale= s->qscale;
4545
    d->dquant= s->dquant;
4546
}
4547

    
4548
static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4549
    int i;
4550

    
4551
    memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4552
    memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4553

    
4554
    /* mpeg1 */
4555
    d->mb_skip_run= s->mb_skip_run;
4556
    for(i=0; i<3; i++)
4557
        d->last_dc[i]= s->last_dc[i];
4558

    
4559
    /* statistics */
4560
    d->mv_bits= s->mv_bits;
4561
    d->i_tex_bits= s->i_tex_bits;
4562
    d->p_tex_bits= s->p_tex_bits;
4563
    d->i_count= s->i_count;
4564
    d->f_count= s->f_count;
4565
    d->b_count= s->b_count;
4566
    d->skip_count= s->skip_count;
4567
    d->misc_bits= s->misc_bits;
4568

    
4569
    d->mb_intra= s->mb_intra;
4570
    d->mb_skipped= s->mb_skipped;
4571
    d->mv_type= s->mv_type;
4572
    d->mv_dir= s->mv_dir;
4573
    d->pb= s->pb;
4574
    if(s->data_partitioning){
4575
        d->pb2= s->pb2;
4576
        d->tex_pb= s->tex_pb;
4577
    }
4578
    d->block= s->block;
4579
    for(i=0; i<6; i++)
4580
        d->block_last_index[i]= s->block_last_index[i];
4581
    d->interlaced_dct= s->interlaced_dct;
4582
    d->qscale= s->qscale;
4583
}
4584

    
4585
static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4586
                           PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4587
                           int *dmin, int *next_block, int motion_x, int motion_y)
4588
{
4589
    int score;
4590
    uint8_t *dest_backup[3];
4591

    
4592
    copy_context_before_encode(s, backup, type);
4593

    
4594
    s->block= s->blocks[*next_block];
4595
    s->pb= pb[*next_block];
4596
    if(s->data_partitioning){
4597
        s->pb2   = pb2   [*next_block];
4598
        s->tex_pb= tex_pb[*next_block];
4599
    }
4600

    
4601
    if(*next_block){
4602
        memcpy(dest_backup, s->dest, sizeof(s->dest));
4603
        s->dest[0] = s->rd_scratchpad;
4604
        s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4605
        s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4606
        assert(s->linesize >= 32); //FIXME
4607
    }
4608

    
4609
    encode_mb(s, motion_x, motion_y);
4610

    
4611
    score= put_bits_count(&s->pb);
4612
    if(s->data_partitioning){
4613
        score+= put_bits_count(&s->pb2);
4614
        score+= put_bits_count(&s->tex_pb);
4615
    }
4616

    
4617
    if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4618
        MPV_decode_mb(s, s->block);
4619

    
4620
        score *= s->lambda2;
4621
        score += sse_mb(s) << FF_LAMBDA_SHIFT;
4622
    }
4623

    
4624
    if(*next_block){
4625
        memcpy(s->dest, dest_backup, sizeof(s->dest));
4626
    }
4627

    
4628
    if(score<*dmin){
4629
        *dmin= score;
4630
        *next_block^=1;
4631

    
4632
        copy_context_after_encode(best, s, type);
4633
    }
4634
}
4635

    
4636
static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4637
    uint32_t *sq = squareTbl + 256;
4638
    int acc=0;
4639
    int x,y;
4640

    
4641
    if(w==16 && h==16)
4642
        return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4643
    else if(w==8 && h==8)
4644
        return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4645

    
4646
    for(y=0; y<h; y++){
4647
        for(x=0; x<w; x++){
4648
            acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
4649
        }
4650
    }
4651

    
4652
    assert(acc>=0);
4653

    
4654
    return acc;
4655
}
4656

    
4657
static int sse_mb(MpegEncContext *s){
4658
    int w= 16;
4659
    int h= 16;
4660

    
4661
    if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
4662
    if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
4663

    
4664
    if(w==16 && h==16)
4665
      if(s->avctx->mb_cmp == FF_CMP_NSSE){
4666
        return  s->dsp.nsse[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4667
               +s->dsp.nsse[1](s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[1], s->uvlinesize, 8)
4668
               +s->dsp.nsse[1](s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], s->uvlinesize, 8);
4669
      }else{
4670
        return  s->dsp.sse[0](NULL, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16, s->dest[0], s->linesize, 16)
4671
               +s->dsp.sse[1](NULL, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[<