Statistics
| Branch: | Revision:

ffmpeg / libavcodec / mpegvideo.c @ c51ba67a

History | View | Annotate | Download (243 KB)

1
/*
2
 * The simplest mpeg encoder (well, it was the simplest!)
3
 * Copyright (c) 2000,2001 Fabrice Bellard.
4
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 *
20
 * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
21
 */
22

    
23
/**
24
 * @file mpegvideo.c
25
 * The simplest mpeg encoder (well, it was the simplest!).
26
 */
27

    
28
#include "avcodec.h"
29
#include "dsputil.h"
30
#include "mpegvideo.h"
31
#include "faandct.h"
32
#include <limits.h>
33

    
34
#ifdef USE_FASTMEMCPY
35
#include "fastmemcpy.h"
36
#endif
37

    
38
//#undef NDEBUG
39
//#include <assert.h>
40

    
41
#ifdef CONFIG_ENCODERS
42
static void encode_picture(MpegEncContext *s, int picture_number);
43
#endif //CONFIG_ENCODERS
44
static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
45
                                   DCTELEM *block, int n, int qscale);
46
static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
47
                                   DCTELEM *block, int n, int qscale);
48
static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
49
                                   DCTELEM *block, int n, int qscale);
50
static void dct_unquantize_mpeg2_intra_bitexact(MpegEncContext *s,
51
                                   DCTELEM *block, int n, int qscale);
52
static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
53
                                   DCTELEM *block, int n, int qscale);
54
static void dct_unquantize_h263_intra_c(MpegEncContext *s,
55
                                  DCTELEM *block, int n, int qscale);
56
static void dct_unquantize_h263_inter_c(MpegEncContext *s,
57
                                  DCTELEM *block, int n, int qscale);
58
static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
59
#ifdef CONFIG_ENCODERS
60
static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
61
static int dct_quantize_trellis_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
62
static int dct_quantize_refine(MpegEncContext *s, DCTELEM *block, int16_t *weight, DCTELEM *orig, int n, int qscale);
63
static int sse_mb(MpegEncContext *s);
64
static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block);
65
#endif //CONFIG_ENCODERS
66

    
67
#ifdef HAVE_XVMC
68
extern int  XVMC_field_start(MpegEncContext*s, AVCodecContext *avctx);
69
extern void XVMC_field_end(MpegEncContext *s);
70
extern void XVMC_decode_mb(MpegEncContext *s);
71
#endif
72

    
73
void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, int w)= draw_edges_c;
74

    
75

    
76
/* enable all paranoid tests for rounding, overflows, etc... */
77
//#define PARANOID
78

    
79
//#define DEBUG
80

    
81

    
82
/* for jpeg fast DCT */
83
#define CONST_BITS 14
84

    
85
static const uint16_t aanscales[64] = {
86
    /* precomputed values scaled up by 14 bits */
87
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
88
    22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
89
    21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
90
    19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
91
    16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
92
    12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
93
    8867 , 12299, 11585, 10426,  8867,  6967,  4799,  2446,
94
    4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
95
};
96

    
97
static const uint8_t h263_chroma_roundtab[16] = {
98
//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
99
    0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
100
};
101

    
102
static const uint8_t ff_default_chroma_qscale_table[32]={
103
//  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
104
    0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
105
};
106

    
107
#ifdef CONFIG_ENCODERS
108
static uint8_t (*default_mv_penalty)[MAX_MV*2+1]=NULL;
109
static uint8_t default_fcode_tab[MAX_MV*2+1];
110

    
111
enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1};
112

    
113
static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[2][64],
114
                           const uint16_t *quant_matrix, int bias, int qmin, int qmax, int intra)
115
{
116
    int qscale;
117
    int shift=0;
118

    
119
    for(qscale=qmin; qscale<=qmax; qscale++){
120
        int i;
121
        if (dsp->fdct == ff_jpeg_fdct_islow
122
#ifdef FAAN_POSTSCALE
123
            || dsp->fdct == ff_faandct
124
#endif
125
            ) {
126
            for(i=0;i<64;i++) {
127
                const int j= dsp->idct_permutation[i];
128
                /* 16 <= qscale * quant_matrix[i] <= 7905 */
129
                /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
130
                /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
131
                /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
132

    
133
                qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
134
                                (qscale * quant_matrix[j]));
135
            }
136
        } else if (dsp->fdct == fdct_ifast
137
#ifndef FAAN_POSTSCALE
138
                   || dsp->fdct == ff_faandct
139
#endif
140
                   ) {
141
            for(i=0;i<64;i++) {
142
                const int j= dsp->idct_permutation[i];
143
                /* 16 <= qscale * quant_matrix[i] <= 7905 */
144
                /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
145
                /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
146
                /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
147

    
148
                qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
149
                                (aanscales[i] * qscale * quant_matrix[j]));
150
            }
151
        } else {
152
            for(i=0;i<64;i++) {
153
                const int j= dsp->idct_permutation[i];
154
                /* We can safely suppose that 16 <= quant_matrix[i] <= 255
155
                   So 16           <= qscale * quant_matrix[i]             <= 7905
156
                   so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
157
                   so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
158
                */
159
                qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j]));
160
//                qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
161
                qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
162

    
163
                if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1;
164
                qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
165
            }
166
        }
167

    
168
        for(i=intra; i<64; i++){
169
            int64_t max= 8191;
170
            if (dsp->fdct == fdct_ifast
171
#ifndef FAAN_POSTSCALE
172
                   || dsp->fdct == ff_faandct
173
#endif
174
                   ) {
175
                max= (8191LL*aanscales[i]) >> 14;
176
            }
177
            while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
178
                shift++;
179
            }
180
        }
181
    }
182
    if(shift){
183
        av_log(NULL, AV_LOG_INFO, "Warning, QMAT_SHIFT is larger then %d, overflows possible\n", QMAT_SHIFT - shift);
184
    }
185
}
186

    
187
static inline void update_qscale(MpegEncContext *s){
188
    s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
189
    s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
190

    
191
    s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
192
}
193
#endif //CONFIG_ENCODERS
194

    
195
void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
196
    int i;
197
    int end;
198

    
199
    st->scantable= src_scantable;
200

    
201
    for(i=0; i<64; i++){
202
        int j;
203
        j = src_scantable[i];
204
        st->permutated[i] = permutation[j];
205
#ifdef ARCH_POWERPC
206
        st->inverse[j] = i;
207
#endif
208
    }
209

    
210
    end=-1;
211
    for(i=0; i<64; i++){
212
        int j;
213
        j = st->permutated[i];
214
        if(j>end) end=j;
215
        st->raster_end[i]= end;
216
    }
217
}
218

    
219
#ifdef CONFIG_ENCODERS
220
void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
221
    int i;
222

    
223
    if(matrix){
224
        put_bits(pb, 1, 1);
225
        for(i=0;i<64;i++) {
226
            put_bits(pb, 8, matrix[ ff_zigzag_direct[i] ]);
227
        }
228
    }else
229
        put_bits(pb, 1, 0);
230
}
231
#endif //CONFIG_ENCODERS
232

    
233
const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
234
    int i;
235

    
236
    assert(p<=end);
237
    if(p>=end)
238
        return end;
239

    
240
    for(i=0; i<3; i++){
241
        uint32_t tmp= *state << 8;
242
        *state= tmp + *(p++);
243
        if(tmp == 0x100 || p==end)
244
            return p;
245
    }
246

    
247
    while(p<end){
248
        if     (p[-1] > 1      ) p+= 3;
249
        else if(p[-2]          ) p+= 2;
250
        else if(p[-3]|(p[-1]-1)) p++;
251
        else{
252
            p++;
253
            break;
254
        }
255
    }
256

    
257
    p= FFMIN(p, end)-4;
258
    *state=  be2me_32(unaligned32(p));
259

    
260
    return p+4;
261
}
262

    
263
/* init common dct for both encoder and decoder */
264
int DCT_common_init(MpegEncContext *s)
265
{
266
    s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c;
267
    s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c;
268
    s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c;
269
    s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c;
270
    s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c;
271
    if(s->flags & CODEC_FLAG_BITEXACT)
272
        s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_bitexact;
273
    s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c;
274

    
275
#ifdef CONFIG_ENCODERS
276
    s->dct_quantize= dct_quantize_c;
277
    s->denoise_dct= denoise_dct_c;
278
#endif //CONFIG_ENCODERS
279

    
280
#ifdef HAVE_MMX
281
    MPV_common_init_mmx(s);
282
#endif
283
#ifdef ARCH_ALPHA
284
    MPV_common_init_axp(s);
285
#endif
286
#ifdef HAVE_MLIB
287
    MPV_common_init_mlib(s);
288
#endif
289
#ifdef HAVE_MMI
290
    MPV_common_init_mmi(s);
291
#endif
292
#ifdef ARCH_ARMV4L
293
    MPV_common_init_armv4l(s);
294
#endif
295
#ifdef ARCH_POWERPC
296
    MPV_common_init_ppc(s);
297
#endif
298

    
299
#ifdef CONFIG_ENCODERS
300
    s->fast_dct_quantize= s->dct_quantize;
301

    
302
    if(s->flags&CODEC_FLAG_TRELLIS_QUANT){
303
        s->dct_quantize= dct_quantize_trellis_c; //move before MPV_common_init_*
304
    }
305

    
306
#endif //CONFIG_ENCODERS
307

    
308
    /* load & permutate scantables
309
       note: only wmv uses different ones
310
    */
311
    if(s->alternate_scan){
312
        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
313
        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
314
    }else{
315
        ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
316
        ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
317
    }
318
    ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
319
    ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
320

    
321
    return 0;
322
}
323

    
324
static void copy_picture(Picture *dst, Picture *src){
325
    *dst = *src;
326
    dst->type= FF_BUFFER_TYPE_COPY;
327
}
328

    
329
static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *src){
330
    int i;
331

    
332
    dst->pict_type              = src->pict_type;
333
    dst->quality                = src->quality;
334
    dst->coded_picture_number   = src->coded_picture_number;
335
    dst->display_picture_number = src->display_picture_number;
336
//    dst->reference              = src->reference;
337
    dst->pts                    = src->pts;
338
    dst->interlaced_frame       = src->interlaced_frame;
339
    dst->top_field_first        = src->top_field_first;
340

    
341
    if(s->avctx->me_threshold){
342
        if(!src->motion_val[0])
343
            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_val not set!\n");
344
        if(!src->mb_type)
345
            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.mb_type not set!\n");
346
        if(!src->ref_index[0])
347
            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.ref_index not set!\n");
348
        if(src->motion_subsample_log2 != dst->motion_subsample_log2)
349
            av_log(s->avctx, AV_LOG_ERROR, "AVFrame.motion_subsample_log2 doesn't match! (%d!=%d)\n",
350
            src->motion_subsample_log2, dst->motion_subsample_log2);
351

    
352
        memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
353

    
354
        for(i=0; i<2; i++){
355
            int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
356
            int height= ((16*s->mb_height)>>src->motion_subsample_log2);
357

    
358
            if(src->motion_val[i] && src->motion_val[i] != dst->motion_val[i]){
359
                memcpy(dst->motion_val[i], src->motion_val[i], 2*stride*height*sizeof(int16_t));
360
            }
361
            if(src->ref_index[i] && src->ref_index[i] != dst->ref_index[i]){
362
                memcpy(dst->ref_index[i], src->ref_index[i], s->b8_stride*2*s->mb_height*sizeof(int8_t));
363
            }
364
        }
365
    }
366
}
367

    
368
/**
369
 * allocates a Picture
370
 * The pixels are allocated/set by calling get_buffer() if shared=0
371
 */
372
static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
373
    const int big_mb_num= s->mb_stride*(s->mb_height+1) + 1; //the +1 is needed so memset(,,stride*height) doesnt sig11
374
    const int mb_array_size= s->mb_stride*s->mb_height;
375
    const int b8_array_size= s->b8_stride*s->mb_height*2;
376
    const int b4_array_size= s->b4_stride*s->mb_height*4;
377
    int i;
378

    
379
    if(shared){
380
        assert(pic->data[0]);
381
        assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
382
        pic->type= FF_BUFFER_TYPE_SHARED;
383
    }else{
384
        int r;
385

    
386
        assert(!pic->data[0]);
387

    
388
        r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
389

    
390
        if(r<0 || !pic->age || !pic->type || !pic->data[0]){
391
            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
392
            return -1;
393
        }
394

    
395
        if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
396
            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (stride changed)\n");
397
            return -1;
398
        }
399

    
400
        if(pic->linesize[1] != pic->linesize[2]){
401
            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (uv stride mismatch)\n");
402
            return -1;
403
        }
404

    
405
        s->linesize  = pic->linesize[0];
406
        s->uvlinesize= pic->linesize[1];
407
    }
408

    
409
    if(pic->qscale_table==NULL){
410
        if (s->encoding) {
411
            CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
412
            CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
413
            CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
414
        }
415

    
416
        CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
417
        CHECKED_ALLOCZ(pic->qscale_table , mb_array_size * sizeof(uint8_t))
418
        CHECKED_ALLOCZ(pic->mb_type_base , big_mb_num    * sizeof(uint32_t))
419
        pic->mb_type= pic->mb_type_base + s->mb_stride+1;
420
        if(s->out_format == FMT_H264){
421
            for(i=0; i<2; i++){
422
                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b4_array_size+4)  * sizeof(int16_t))
423
                pic->motion_val[i]= pic->motion_val_base[i]+4;
424
                CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
425
            }
426
            pic->motion_subsample_log2= 2;
427
        }else if(s->out_format == FMT_H263 || s->encoding || (s->avctx->debug&FF_DEBUG_MV) || (s->avctx->debug_mv)){
428
            for(i=0; i<2; i++){
429
                CHECKED_ALLOCZ(pic->motion_val_base[i], 2 * (b8_array_size+4) * sizeof(int16_t))
430
                pic->motion_val[i]= pic->motion_val_base[i]+4;
431
                CHECKED_ALLOCZ(pic->ref_index[i], b8_array_size * sizeof(uint8_t))
432
            }
433
            pic->motion_subsample_log2= 3;
434
        }
435
        if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
436
            CHECKED_ALLOCZ(pic->dct_coeff, 64 * mb_array_size * sizeof(DCTELEM)*6)
437
        }
438
        pic->qstride= s->mb_stride;
439
        CHECKED_ALLOCZ(pic->pan_scan , 1 * sizeof(AVPanScan))
440
    }
441

    
442
    //it might be nicer if the application would keep track of these but it would require a API change
443
    memmove(s->prev_pict_types+1, s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE-1);
444
    s->prev_pict_types[0]= s->pict_type;
445
    if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
446
        pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
447

    
448
    return 0;
449
fail: //for the CHECKED_ALLOCZ macro
450
    return -1;
451
}
452

    
453
/**
454
 * deallocates a picture
455
 */
456
static void free_picture(MpegEncContext *s, Picture *pic){
457
    int i;
458

    
459
    if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
460
        s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
461
    }
462

    
463
    av_freep(&pic->mb_var);
464
    av_freep(&pic->mc_mb_var);
465
    av_freep(&pic->mb_mean);
466
    av_freep(&pic->mbskip_table);
467
    av_freep(&pic->qscale_table);
468
    av_freep(&pic->mb_type_base);
469
    av_freep(&pic->dct_coeff);
470
    av_freep(&pic->pan_scan);
471
    pic->mb_type= NULL;
472
    for(i=0; i<2; i++){
473
        av_freep(&pic->motion_val_base[i]);
474
        av_freep(&pic->ref_index[i]);
475
    }
476

    
477
    if(pic->type == FF_BUFFER_TYPE_SHARED){
478
        for(i=0; i<4; i++){
479
            pic->base[i]=
480
            pic->data[i]= NULL;
481
        }
482
        pic->type= 0;
483
    }
484
}
485

    
486
static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
487
    int i;
488

    
489
    // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
490
    CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
491
    s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
492

    
493
     //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
494
    CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
495
    s->rd_scratchpad=   s->me.scratchpad;
496
    s->b_scratchpad=    s->me.scratchpad;
497
    s->obmc_scratchpad= s->me.scratchpad + 16;
498
    if (s->encoding) {
499
        CHECKED_ALLOCZ(s->me.map      , ME_MAP_SIZE*sizeof(uint32_t))
500
        CHECKED_ALLOCZ(s->me.score_map, ME_MAP_SIZE*sizeof(uint32_t))
501
        if(s->avctx->noise_reduction){
502
            CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
503
        }
504
    }
505
    CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
506
    s->block= s->blocks[0];
507

    
508
    for(i=0;i<12;i++){
509
        s->pblocks[i] = (short *)(&s->block[i]);
510
    }
511
    return 0;
512
fail:
513
    return -1; //free() through MPV_common_end()
514
}
515

    
516
static void free_duplicate_context(MpegEncContext *s){
517
    if(s==NULL) return;
518

    
519
    av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
520
    av_freep(&s->me.scratchpad);
521
    s->rd_scratchpad=
522
    s->b_scratchpad=
523
    s->obmc_scratchpad= NULL;
524

    
525
    av_freep(&s->dct_error_sum);
526
    av_freep(&s->me.map);
527
    av_freep(&s->me.score_map);
528
    av_freep(&s->blocks);
529
    s->block= NULL;
530
}
531

    
532
static void backup_duplicate_context(MpegEncContext *bak, MpegEncContext *src){
533
#define COPY(a) bak->a= src->a
534
    COPY(allocated_edge_emu_buffer);
535
    COPY(edge_emu_buffer);
536
    COPY(me.scratchpad);
537
    COPY(rd_scratchpad);
538
    COPY(b_scratchpad);
539
    COPY(obmc_scratchpad);
540
    COPY(me.map);
541
    COPY(me.score_map);
542
    COPY(blocks);
543
    COPY(block);
544
    COPY(start_mb_y);
545
    COPY(end_mb_y);
546
    COPY(me.map_generation);
547
    COPY(pb);
548
    COPY(dct_error_sum);
549
    COPY(dct_count[0]);
550
    COPY(dct_count[1]);
551
#undef COPY
552
}
553

    
554
void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src){
555
    MpegEncContext bak;
556
    int i;
557
    //FIXME copy only needed parts
558
//START_TIMER
559
    backup_duplicate_context(&bak, dst);
560
    memcpy(dst, src, sizeof(MpegEncContext));
561
    backup_duplicate_context(dst, &bak);
562
    for(i=0;i<12;i++){
563
        dst->pblocks[i] = (short *)(&dst->block[i]);
564
    }
565
//STOP_TIMER("update_duplicate_context") //about 10k cycles / 0.01 sec for 1000frames on 1ghz with 2 threads
566
}
567

    
568
static void update_duplicate_context_after_me(MpegEncContext *dst, MpegEncContext *src){
569
#define COPY(a) dst->a= src->a
570
    COPY(pict_type);
571
    COPY(current_picture);
572
    COPY(f_code);
573
    COPY(b_code);
574
    COPY(qscale);
575
    COPY(lambda);
576
    COPY(lambda2);
577
    COPY(picture_in_gop_number);
578
    COPY(gop_picture_number);
579
    COPY(frame_pred_frame_dct); //FIXME don't set in encode_header
580
    COPY(progressive_frame); //FIXME don't set in encode_header
581
    COPY(partitioned_frame); //FIXME don't set in encode_header
582
#undef COPY
583
}
584

    
585
/**
586
 * sets the given MpegEncContext to common defaults (same for encoding and decoding).
587
 * the changed fields will not depend upon the prior state of the MpegEncContext.
588
 */
589
static void MPV_common_defaults(MpegEncContext *s){
590
    s->y_dc_scale_table=
591
    s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
592
    s->chroma_qscale_table= ff_default_chroma_qscale_table;
593
    s->progressive_frame= 1;
594
    s->progressive_sequence= 1;
595
    s->picture_structure= PICT_FRAME;
596

    
597
    s->coded_picture_number = 0;
598
    s->picture_number = 0;
599
    s->input_picture_number = 0;
600

    
601
    s->picture_in_gop_number = 0;
602

    
603
    s->f_code = 1;
604
    s->b_code = 1;
605
}
606

    
607
/**
608
 * sets the given MpegEncContext to defaults for decoding.
609
 * the changed fields will not depend upon the prior state of the MpegEncContext.
610
 */
611
void MPV_decode_defaults(MpegEncContext *s){
612
    MPV_common_defaults(s);
613
}
614

    
615
/**
616
 * sets the given MpegEncContext to defaults for encoding.
617
 * the changed fields will not depend upon the prior state of the MpegEncContext.
618
 */
619

    
620
#ifdef CONFIG_ENCODERS
621
static void MPV_encode_defaults(MpegEncContext *s){
622
    static int done=0;
623

    
624
    MPV_common_defaults(s);
625

    
626
    if(!done){
627
        int i;
628
        done=1;
629

    
630
        default_mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
631
        memset(default_fcode_tab , 0, sizeof(uint8_t)*(2*MAX_MV+1));
632

    
633
        for(i=-16; i<16; i++){
634
            default_fcode_tab[i + MAX_MV]= 1;
635
        }
636
    }
637
    s->me.mv_penalty= default_mv_penalty;
638
    s->fcode_tab= default_fcode_tab;
639
}
640
#endif //CONFIG_ENCODERS
641

    
642
/**
643
 * init common structure for both encoder and decoder.
644
 * this assumes that some variables like width/height are already set
645
 */
646
int MPV_common_init(MpegEncContext *s)
647
{
648
    int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
649

    
650
    s->mb_height = (s->height + 15) / 16;
651

    
652
    if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
653
        av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
654
        return -1;
655
    }
656

    
657
    if((s->width || s->height) && avcodec_check_dimensions(s->avctx, s->width, s->height))
658
        return -1;
659

    
660
    dsputil_init(&s->dsp, s->avctx);
661
    DCT_common_init(s);
662

    
663
    s->flags= s->avctx->flags;
664
    s->flags2= s->avctx->flags2;
665

    
666
    s->mb_width  = (s->width  + 15) / 16;
667
    s->mb_stride = s->mb_width + 1;
668
    s->b8_stride = s->mb_width*2 + 1;
669
    s->b4_stride = s->mb_width*4 + 1;
670
    mb_array_size= s->mb_height * s->mb_stride;
671
    mv_table_size= (s->mb_height+2) * s->mb_stride + 1;
672

    
673
    /* set chroma shifts */
674
    avcodec_get_chroma_sub_sample(s->avctx->pix_fmt,&(s->chroma_x_shift),
675
                                                    &(s->chroma_y_shift) );
676

    
677
    /* set default edge pos, will be overriden in decode_header if needed */
678
    s->h_edge_pos= s->mb_width*16;
679
    s->v_edge_pos= s->mb_height*16;
680

    
681
    s->mb_num = s->mb_width * s->mb_height;
682

    
683
    s->block_wrap[0]=
684
    s->block_wrap[1]=
685
    s->block_wrap[2]=
686
    s->block_wrap[3]= s->b8_stride;
687
    s->block_wrap[4]=
688
    s->block_wrap[5]= s->mb_stride;
689

    
690
    y_size = s->b8_stride * (2 * s->mb_height + 1);
691
    c_size = s->mb_stride * (s->mb_height + 1);
692
    yc_size = y_size + 2 * c_size;
693

    
694
    /* convert fourcc to upper case */
695
    s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
696
                        + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
697
                        + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
698
                        + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
699

    
700
    s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
701
                               + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
702
                               + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
703
                               + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
704

    
705
    s->avctx->coded_frame= (AVFrame*)&s->current_picture;
706

    
707
    CHECKED_ALLOCZ(s->mb_index2xy, (s->mb_num+1)*sizeof(int)) //error ressilience code looks cleaner with this
708
    for(y=0; y<s->mb_height; y++){
709
        for(x=0; x<s->mb_width; x++){
710
            s->mb_index2xy[ x + y*s->mb_width ] = x + y*s->mb_stride;
711
        }
712
    }
713
    s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
714

    
715
    if (s->encoding) {
716
        /* Allocate MV tables */
717
        CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
718
        CHECKED_ALLOCZ(s->b_forw_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
719
        CHECKED_ALLOCZ(s->b_back_mv_table_base       , mv_table_size * 2 * sizeof(int16_t))
720
        CHECKED_ALLOCZ(s->b_bidir_forw_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
721
        CHECKED_ALLOCZ(s->b_bidir_back_mv_table_base , mv_table_size * 2 * sizeof(int16_t))
722
        CHECKED_ALLOCZ(s->b_direct_mv_table_base     , mv_table_size * 2 * sizeof(int16_t))
723
        s->p_mv_table           = s->p_mv_table_base            + s->mb_stride + 1;
724
        s->b_forw_mv_table      = s->b_forw_mv_table_base       + s->mb_stride + 1;
725
        s->b_back_mv_table      = s->b_back_mv_table_base       + s->mb_stride + 1;
726
        s->b_bidir_forw_mv_table= s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
727
        s->b_bidir_back_mv_table= s->b_bidir_back_mv_table_base + s->mb_stride + 1;
728
        s->b_direct_mv_table    = s->b_direct_mv_table_base     + s->mb_stride + 1;
729

    
730
        if(s->msmpeg4_version){
731
            CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
732
        }
733
        CHECKED_ALLOCZ(s->avctx->stats_out, 256);
734

    
735
        /* Allocate MB type table */
736
        CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
737

    
738
        CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
739

    
740
        CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
741
        CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
742
        CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
743
        CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
744
        CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
745
        CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
746

    
747
        if(s->avctx->noise_reduction){
748
            CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
749
        }
750
    }
751
    CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
752

    
753
    CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
754

    
755
    if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
756
        /* interlaced direct mode decoding tables */
757
            for(i=0; i<2; i++){
758
                int j, k;
759
                for(j=0; j<2; j++){
760
                    for(k=0; k<2; k++){
761
                        CHECKED_ALLOCZ(s->b_field_mv_table_base[i][j][k]     , mv_table_size * 2 * sizeof(int16_t))
762
                        s->b_field_mv_table[i][j][k]    = s->b_field_mv_table_base[i][j][k]     + s->mb_stride + 1;
763
                    }
764
                    CHECKED_ALLOCZ(s->b_field_select_table[i][j]     , mb_array_size * 2 * sizeof(uint8_t))
765
                    CHECKED_ALLOCZ(s->p_field_mv_table_base[i][j]     , mv_table_size * 2 * sizeof(int16_t))
766
                    s->p_field_mv_table[i][j]    = s->p_field_mv_table_base[i][j]     + s->mb_stride + 1;
767
                }
768
                CHECKED_ALLOCZ(s->p_field_select_table[i]      , mb_array_size * 2 * sizeof(uint8_t))
769
            }
770
    }
771
    if (s->out_format == FMT_H263) {
772
        /* ac values */
773
        CHECKED_ALLOCZ(s->ac_val_base, yc_size * sizeof(int16_t) * 16);
774
        s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
775
        s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
776
        s->ac_val[2] = s->ac_val[1] + c_size;
777

    
778
        /* cbp values */
779
        CHECKED_ALLOCZ(s->coded_block_base, y_size);
780
        s->coded_block= s->coded_block_base + s->b8_stride + 1;
781

    
782
        /* cbp, ac_pred, pred_dir */
783
        CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
784
        CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
785
    }
786

    
787
    if (s->h263_pred || s->h263_plus || !s->encoding) {
788
        /* dc values */
789
        //MN: we need these for error resilience of intra-frames
790
        CHECKED_ALLOCZ(s->dc_val_base, yc_size * sizeof(int16_t));
791
        s->dc_val[0] = s->dc_val_base + s->b8_stride + 1;
792
        s->dc_val[1] = s->dc_val_base + y_size + s->mb_stride + 1;
793
        s->dc_val[2] = s->dc_val[1] + c_size;
794
        for(i=0;i<yc_size;i++)
795
            s->dc_val_base[i] = 1024;
796
    }
797

    
798
    /* which mb is a intra block */
799
    CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
800
    memset(s->mbintra_table, 1, mb_array_size);
801

    
802
    /* init macroblock skip table */
803
    CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
804
    //Note the +1 is for a quicker mpeg4 slice_end detection
805
    CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
806

    
807
    s->parse_context.state= -1;
808
    if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
809
       s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
810
       s->visualization_buffer[1] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
811
       s->visualization_buffer[2] = av_malloc((s->mb_width*8 + EDGE_WIDTH) * s->mb_height*8 + EDGE_WIDTH);
812
    }
813

    
814
    s->context_initialized = 1;
815

    
816
    s->thread_context[0]= s;
817
    for(i=1; i<s->avctx->thread_count; i++){
818
        s->thread_context[i]= av_malloc(sizeof(MpegEncContext));
819
        memcpy(s->thread_context[i], s, sizeof(MpegEncContext));
820
    }
821

    
822
    for(i=0; i<s->avctx->thread_count; i++){
823
        if(init_duplicate_context(s->thread_context[i], s) < 0)
824
           goto fail;
825
        s->thread_context[i]->start_mb_y= (s->mb_height*(i  ) + s->avctx->thread_count/2) / s->avctx->thread_count;
826
        s->thread_context[i]->end_mb_y  = (s->mb_height*(i+1) + s->avctx->thread_count/2) / s->avctx->thread_count;
827
    }
828

    
829
    return 0;
830
 fail:
831
    MPV_common_end(s);
832
    return -1;
833
}
834

    
835
/* init common structure for both encoder and decoder */
836
void MPV_common_end(MpegEncContext *s)
837
{
838
    int i, j, k;
839

    
840
    for(i=0; i<s->avctx->thread_count; i++){
841
        free_duplicate_context(s->thread_context[i]);
842
    }
843
    for(i=1; i<s->avctx->thread_count; i++){
844
        av_freep(&s->thread_context[i]);
845
    }
846

    
847
    av_freep(&s->parse_context.buffer);
848
    s->parse_context.buffer_size=0;
849

    
850
    av_freep(&s->mb_type);
851
    av_freep(&s->p_mv_table_base);
852
    av_freep(&s->b_forw_mv_table_base);
853
    av_freep(&s->b_back_mv_table_base);
854
    av_freep(&s->b_bidir_forw_mv_table_base);
855
    av_freep(&s->b_bidir_back_mv_table_base);
856
    av_freep(&s->b_direct_mv_table_base);
857
    s->p_mv_table= NULL;
858
    s->b_forw_mv_table= NULL;
859
    s->b_back_mv_table= NULL;
860
    s->b_bidir_forw_mv_table= NULL;
861
    s->b_bidir_back_mv_table= NULL;
862
    s->b_direct_mv_table= NULL;
863
    for(i=0; i<2; i++){
864
        for(j=0; j<2; j++){
865
            for(k=0; k<2; k++){
866
                av_freep(&s->b_field_mv_table_base[i][j][k]);
867
                s->b_field_mv_table[i][j][k]=NULL;
868
            }
869
            av_freep(&s->b_field_select_table[i][j]);
870
            av_freep(&s->p_field_mv_table_base[i][j]);
871
            s->p_field_mv_table[i][j]=NULL;
872
        }
873
        av_freep(&s->p_field_select_table[i]);
874
    }
875

    
876
    av_freep(&s->dc_val_base);
877
    av_freep(&s->ac_val_base);
878
    av_freep(&s->coded_block_base);
879
    av_freep(&s->mbintra_table);
880
    av_freep(&s->cbp_table);
881
    av_freep(&s->pred_dir_table);
882

    
883
    av_freep(&s->mbskip_table);
884
    av_freep(&s->prev_pict_types);
885
    av_freep(&s->bitstream_buffer);
886
    s->allocated_bitstream_buffer_size=0;
887

    
888
    av_freep(&s->avctx->stats_out);
889
    av_freep(&s->ac_stats);
890
    av_freep(&s->error_status_table);
891
    av_freep(&s->mb_index2xy);
892
    av_freep(&s->lambda_table);
893
    av_freep(&s->q_intra_matrix);
894
    av_freep(&s->q_inter_matrix);
895
    av_freep(&s->q_intra_matrix16);
896
    av_freep(&s->q_inter_matrix16);
897
    av_freep(&s->input_picture);
898
    av_freep(&s->reordered_input_picture);
899
    av_freep(&s->dct_offset);
900

    
901
    if(s->picture){
902
        for(i=0; i<MAX_PICTURE_COUNT; i++){
903
            free_picture(s, &s->picture[i]);
904
        }
905
    }
906
    av_freep(&s->picture);
907
    s->context_initialized = 0;
908
    s->last_picture_ptr=
909
    s->next_picture_ptr=
910
    s->current_picture_ptr= NULL;
911
    s->linesize= s->uvlinesize= 0;
912

    
913
    for(i=0; i<3; i++)
914
        av_freep(&s->visualization_buffer[i]);
915

    
916
    avcodec_default_free_buffers(s->avctx);
917
}
918

    
919
#ifdef CONFIG_ENCODERS
920

    
921
/* init video encoder */
922
int MPV_encode_init(AVCodecContext *avctx)
923
{
924
    MpegEncContext *s = avctx->priv_data;
925
    int i;
926
    int chroma_h_shift, chroma_v_shift;
927

    
928
    MPV_encode_defaults(s);
929

    
930
    if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
931
        av_log(avctx, AV_LOG_ERROR, "only YUV420 is supported\n");
932
        return -1;
933
    }
934

    
935
    if(avctx->codec_id == CODEC_ID_MJPEG || avctx->codec_id == CODEC_ID_LJPEG){
936
        if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUVJ420P){
937
            av_log(avctx, AV_LOG_ERROR, "colorspace not supported in jpeg\n");
938
            return -1;
939
        }
940
    }else{
941
        if(avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL && avctx->pix_fmt != PIX_FMT_YUV420P){
942
            av_log(avctx, AV_LOG_ERROR, "colorspace not supported\n");
943
            return -1;
944
        }
945
    }
946

    
947
    s->bit_rate = avctx->bit_rate;
948
    s->width = avctx->width;
949
    s->height = avctx->height;
950
    if(avctx->gop_size > 600){
951
        av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
952
        avctx->gop_size=600;
953
    }
954
    s->gop_size = avctx->gop_size;
955
    s->avctx = avctx;
956
    s->flags= avctx->flags;
957
    s->flags2= avctx->flags2;
958
    s->max_b_frames= avctx->max_b_frames;
959
    s->codec_id= avctx->codec->id;
960
    s->luma_elim_threshold  = avctx->luma_elim_threshold;
961
    s->chroma_elim_threshold= avctx->chroma_elim_threshold;
962
    s->strict_std_compliance= avctx->strict_std_compliance;
963
    s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
964
    s->quarter_sample= (avctx->flags & CODEC_FLAG_QPEL)!=0;
965
    s->mpeg_quant= avctx->mpeg_quant;
966
    s->rtp_mode= !!avctx->rtp_payload_size;
967
    s->intra_dc_precision= avctx->intra_dc_precision;
968
    s->user_specified_pts = AV_NOPTS_VALUE;
969

    
970
    if (s->gop_size <= 1) {
971
        s->intra_only = 1;
972
        s->gop_size = 12;
973
    } else {
974
        s->intra_only = 0;
975
    }
976

    
977
    s->me_method = avctx->me_method;
978

    
979
    /* Fixed QSCALE */
980
    s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
981

    
982
    s->adaptive_quant= (   s->avctx->lumi_masking
983
                        || s->avctx->dark_masking
984
                        || s->avctx->temporal_cplx_masking
985
                        || s->avctx->spatial_cplx_masking
986
                        || s->avctx->p_masking
987
                        || s->avctx->border_masking
988
                        || (s->flags&CODEC_FLAG_QP_RD))
989
                       && !s->fixed_qscale;
990

    
991
    s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
992
    s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
993
    s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
994

    
995
    if(avctx->rc_max_rate && !avctx->rc_buffer_size){
996
        av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
997
        return -1;
998
    }
999

    
1000
    if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
1001
        av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
1002
    }
1003

    
1004
    if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
1005
        av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
1006
        return -1;
1007
    }
1008

    
1009
    if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
1010
        av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
1011
        return -1;
1012
    }
1013

    
1014
    if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
1015
       && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
1016
       && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
1017

    
1018
        av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
1019
    }
1020

    
1021
    if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
1022
       && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
1023
        av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
1024
        return -1;
1025
    }
1026

    
1027
    if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
1028
        av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
1029
        return -1;
1030
    }
1031

    
1032
    if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
1033
        av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
1034
        return -1;
1035
    }
1036

    
1037
    if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
1038
        av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
1039
        return -1;
1040
    }
1041

    
1042
    if(s->data_partitioning && s->codec_id != CODEC_ID_MPEG4){
1043
        av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
1044
        return -1;
1045
    }
1046

    
1047
    if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
1048
        av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
1049
        return -1;
1050
    }
1051

    
1052
    if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
1053
       && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
1054
        av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
1055
        return -1;
1056
    }
1057

    
1058
    if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
1059
        av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
1060
        return -1;
1061
    }
1062

    
1063
    if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
1064
        av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
1065
        return -1;
1066
    }
1067

    
1068
    if((s->flags & CODEC_FLAG_QP_RD) && s->avctx->mb_decision != FF_MB_DECISION_RD){
1069
        av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
1070
        return -1;
1071
    }
1072

    
1073
    if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
1074
        av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
1075
        return -1;
1076
    }
1077

    
1078
    if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
1079
       && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
1080
       && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
1081
        av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
1082
        return -1;
1083
    }
1084

    
1085
    if(s->avctx->thread_count > 1)
1086
        s->rtp_mode= 1;
1087

    
1088
    if(!avctx->time_base.den || !avctx->time_base.num){
1089
        av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
1090
        return -1;
1091
    }
1092

    
1093
    i= (INT_MAX/2+128)>>8;
1094
    if(avctx->me_threshold >= i){
1095
        av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
1096
        return -1;
1097
    }
1098
    if(avctx->mb_threshold >= i){
1099
        av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
1100
        return -1;
1101
    }
1102

    
1103
    if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
1104
        av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass\n");
1105
        return -1;
1106
    }
1107

    
1108
    i= ff_gcd(avctx->time_base.den, avctx->time_base.num);
1109
    if(i > 1){
1110
        av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
1111
        avctx->time_base.den /= i;
1112
        avctx->time_base.num /= i;
1113
//        return -1;
1114
    }
1115

    
1116
    if(s->codec_id==CODEC_ID_MJPEG){
1117
        s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
1118
        s->inter_quant_bias= 0;
1119
    }else if(s->mpeg_quant || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO){
1120
        s->intra_quant_bias= 3<<(QUANT_BIAS_SHIFT-3); //(a + x*3/8)/x
1121
        s->inter_quant_bias= 0;
1122
    }else{
1123
        s->intra_quant_bias=0;
1124
        s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
1125
    }
1126

    
1127
    if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
1128
        s->intra_quant_bias= avctx->intra_quant_bias;
1129
    if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
1130
        s->inter_quant_bias= avctx->inter_quant_bias;
1131

    
1132
    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
1133

    
1134
    if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
1135
        av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
1136
        return -1;
1137
    }
1138
    s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
1139

    
1140
    switch(avctx->codec->id) {
1141
    case CODEC_ID_MPEG1VIDEO:
1142
        s->out_format = FMT_MPEG1;
1143
        s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1144
        avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1145
        break;
1146
    case CODEC_ID_MPEG2VIDEO:
1147
        s->out_format = FMT_MPEG1;
1148
        s->low_delay= 0; //s->max_b_frames ? 0 : 1;
1149
        avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1150
        s->rtp_mode= 1;
1151
        break;
1152
    case CODEC_ID_LJPEG:
1153
    case CODEC_ID_JPEGLS:
1154
    case CODEC_ID_MJPEG:
1155
        s->out_format = FMT_MJPEG;
1156
        s->intra_only = 1; /* force intra only for jpeg */
1157
        s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
1158
        s->mjpeg_data_only_frames = 0; /* write all the needed headers */
1159
        s->mjpeg_vsample[0] = 1<<chroma_v_shift;
1160
        s->mjpeg_vsample[1] = 1;
1161
        s->mjpeg_vsample[2] = 1;
1162
        s->mjpeg_hsample[0] = 1<<chroma_h_shift;
1163
        s->mjpeg_hsample[1] = 1;
1164
        s->mjpeg_hsample[2] = 1;
1165
        if (mjpeg_init(s) < 0)
1166
            return -1;
1167
        avctx->delay=0;
1168
        s->low_delay=1;
1169
        break;
1170
    case CODEC_ID_H261:
1171
        s->out_format = FMT_H261;
1172
        avctx->delay=0;
1173
        s->low_delay=1;
1174
        break;
1175
    case CODEC_ID_H263:
1176
        if (h263_get_picture_format(s->width, s->height) == 7) {
1177
            av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
1178
            return -1;
1179
        }
1180
        s->out_format = FMT_H263;
1181
        s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1182
        avctx->delay=0;
1183
        s->low_delay=1;
1184
        break;
1185
    case CODEC_ID_H263P:
1186
        s->out_format = FMT_H263;
1187
        s->h263_plus = 1;
1188
        /* Fx */
1189
        s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
1190
        s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
1191
        s->modified_quant= s->h263_aic;
1192
        s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
1193
        s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
1194
        s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
1195
        s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1196
        s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
1197

    
1198
        /* /Fx */
1199
        /* These are just to be sure */
1200
        avctx->delay=0;
1201
        s->low_delay=1;
1202
        break;
1203
    case CODEC_ID_FLV1:
1204
        s->out_format = FMT_H263;
1205
        s->h263_flv = 2; /* format = 1; 11-bit codes */
1206
        s->unrestricted_mv = 1;
1207
        s->rtp_mode=0; /* don't allow GOB */
1208
        avctx->delay=0;
1209
        s->low_delay=1;
1210
        break;
1211
    case CODEC_ID_RV10:
1212
        s->out_format = FMT_H263;
1213
        avctx->delay=0;
1214
        s->low_delay=1;
1215
        break;
1216
    case CODEC_ID_RV20:
1217
        s->out_format = FMT_H263;
1218
        avctx->delay=0;
1219
        s->low_delay=1;
1220
        s->modified_quant=1;
1221
        s->h263_aic=1;
1222
        s->h263_plus=1;
1223
        s->loop_filter=1;
1224
        s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
1225
        break;
1226
    case CODEC_ID_MPEG4:
1227
        s->out_format = FMT_H263;
1228
        s->h263_pred = 1;
1229
        s->unrestricted_mv = 1;
1230
        s->low_delay= s->max_b_frames ? 0 : 1;
1231
        avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
1232
        break;
1233
    case CODEC_ID_MSMPEG4V1:
1234
        s->out_format = FMT_H263;
1235
        s->h263_msmpeg4 = 1;
1236
        s->h263_pred = 1;
1237
        s->unrestricted_mv = 1;
1238
        s->msmpeg4_version= 1;
1239
        avctx->delay=0;
1240
        s->low_delay=1;
1241
        break;
1242
    case CODEC_ID_MSMPEG4V2:
1243
        s->out_format = FMT_H263;
1244
        s->h263_msmpeg4 = 1;
1245
        s->h263_pred = 1;
1246
        s->unrestricted_mv = 1;
1247
        s->msmpeg4_version= 2;
1248
        avctx->delay=0;
1249
        s->low_delay=1;
1250
        break;
1251
    case CODEC_ID_MSMPEG4V3:
1252
        s->out_format = FMT_H263;
1253
        s->h263_msmpeg4 = 1;
1254
        s->h263_pred = 1;
1255
        s->unrestricted_mv = 1;
1256
        s->msmpeg4_version= 3;
1257
        s->flipflop_rounding=1;
1258
        avctx->delay=0;
1259
        s->low_delay=1;
1260
        break;
1261
    case CODEC_ID_WMV1:
1262
        s->out_format = FMT_H263;
1263
        s->h263_msmpeg4 = 1;
1264
        s->h263_pred = 1;
1265
        s->unrestricted_mv = 1;
1266
        s->msmpeg4_version= 4;
1267
        s->flipflop_rounding=1;
1268
        avctx->delay=0;
1269
        s->low_delay=1;
1270
        break;
1271
    case CODEC_ID_WMV2:
1272
        s->out_format = FMT_H263;
1273
        s->h263_msmpeg4 = 1;
1274
        s->h263_pred = 1;
1275
        s->unrestricted_mv = 1;
1276
        s->msmpeg4_version= 5;
1277
        s->flipflop_rounding=1;
1278
        avctx->delay=0;
1279
        s->low_delay=1;
1280
        break;
1281
    default:
1282
        return -1;
1283
    }
1284

    
1285
    avctx->has_b_frames= !s->low_delay;
1286

    
1287
    s->encoding = 1;
1288

    
1289
    /* init */
1290
    if (MPV_common_init(s) < 0)
1291
        return -1;
1292

    
1293
    if(s->modified_quant)
1294
        s->chroma_qscale_table= ff_h263_chroma_qscale_table;
1295
    s->progressive_frame=
1296
    s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
1297
    s->quant_precision=5;
1298

    
1299
    ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
1300
    ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
1301

    
1302
#ifdef CONFIG_H261_ENCODER
1303
    if (s->out_format == FMT_H261)
1304
        ff_h261_encode_init(s);
1305
#endif
1306
    if (s->out_format == FMT_H263)
1307
        h263_encode_init(s);
1308
    if(s->msmpeg4_version)
1309
        ff_msmpeg4_encode_init(s);
1310
    if (s->out_format == FMT_MPEG1)
1311
        ff_mpeg1_encode_init(s);
1312

    
1313
    /* init q matrix */
1314
    for(i=0;i<64;i++) {
1315
        int j= s->dsp.idct_permutation[i];
1316
        if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
1317
            s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
1318
            s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
1319
        }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1320
            s->intra_matrix[j] =
1321
            s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1322
        }else
1323
        { /* mpeg1/2 */
1324
            s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
1325
            s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
1326
        }
1327
        if(s->avctx->intra_matrix)
1328
            s->intra_matrix[j] = s->avctx->intra_matrix[i];
1329
        if(s->avctx->inter_matrix)
1330
            s->inter_matrix[j] = s->avctx->inter_matrix[i];
1331
    }
1332

    
1333
    /* precompute matrix */
1334
    /* for mjpeg, we do include qscale in the matrix */
1335
    if (s->out_format != FMT_MJPEG) {
1336
        convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
1337
                       s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
1338
        convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
1339
                       s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
1340
    }
1341

    
1342
    if(ff_rate_control_init(s) < 0)
1343
        return -1;
1344

    
1345
    return 0;
1346
}
1347

    
1348
int MPV_encode_end(AVCodecContext *avctx)
1349
{
1350
    MpegEncContext *s = avctx->priv_data;
1351

    
1352
#ifdef STATS
1353
    print_stats();
1354
#endif
1355

    
1356
    ff_rate_control_uninit(s);
1357

    
1358
    MPV_common_end(s);
1359
    if (s->out_format == FMT_MJPEG)
1360
        mjpeg_close(s);
1361

    
1362
    av_freep(&avctx->extradata);
1363

    
1364
    return 0;
1365
}
1366

    
1367
#endif //CONFIG_ENCODERS
1368

    
1369
void init_rl(RLTable *rl, int use_static)
1370
{
1371
    int8_t max_level[MAX_RUN+1], max_run[MAX_LEVEL+1];
1372
    uint8_t index_run[MAX_RUN+1];
1373
    int last, run, level, start, end, i;
1374

    
1375
    /* If table is static, we can quit if rl->max_level[0] is not NULL */
1376
    if(use_static && rl->max_level[0])
1377
        return;
1378

    
1379
    /* compute max_level[], max_run[] and index_run[] */
1380
    for(last=0;last<2;last++) {
1381
        if (last == 0) {
1382
            start = 0;
1383
            end = rl->last;
1384
        } else {
1385
            start = rl->last;
1386
            end = rl->n;
1387
        }
1388

    
1389
        memset(max_level, 0, MAX_RUN + 1);
1390
        memset(max_run, 0, MAX_LEVEL + 1);
1391
        memset(index_run, rl->n, MAX_RUN + 1);
1392
        for(i=start;i<end;i++) {
1393
            run = rl->table_run[i];
1394
            level = rl->table_level[i];
1395
            if (index_run[run] == rl->n)
1396
                index_run[run] = i;
1397
            if (level > max_level[run])
1398
                max_level[run] = level;
1399
            if (run > max_run[level])
1400
                max_run[level] = run;
1401
        }
1402
        if(use_static)
1403
            rl->max_level[last] = av_mallocz_static(MAX_RUN + 1);
1404
        else
1405
            rl->max_level[last] = av_malloc(MAX_RUN + 1);
1406
        memcpy(rl->max_level[last], max_level, MAX_RUN + 1);
1407
        if(use_static)
1408
            rl->max_run[last] = av_mallocz_static(MAX_LEVEL + 1);
1409
        else
1410
            rl->max_run[last] = av_malloc(MAX_LEVEL + 1);
1411
        memcpy(rl->max_run[last], max_run, MAX_LEVEL + 1);
1412
        if(use_static)
1413
            rl->index_run[last] = av_mallocz_static(MAX_RUN + 1);
1414
        else
1415
            rl->index_run[last] = av_malloc(MAX_RUN + 1);
1416
        memcpy(rl->index_run[last], index_run, MAX_RUN + 1);
1417
    }
1418
}
1419

    
1420
/* draw the edges of width 'w' of an image of size width, height */
1421
//FIXME check that this is ok for mpeg4 interlaced
1422
static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
1423
{
1424
    uint8_t *ptr, *last_line;
1425
    int i;
1426

    
1427
    last_line = buf + (height - 1) * wrap;
1428
    for(i=0;i<w;i++) {
1429
        /* top and bottom */
1430
        memcpy(buf - (i + 1) * wrap, buf, width);
1431
        memcpy(last_line + (i + 1) * wrap, last_line, width);
1432
    }
1433
    /* left and right */
1434
    ptr = buf;
1435
    for(i=0;i<height;i++) {
1436
        memset(ptr - w, ptr[0], w);
1437
        memset(ptr + width, ptr[width-1], w);
1438
        ptr += wrap;
1439
    }
1440
    /* corners */
1441
    for(i=0;i<w;i++) {
1442
        memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
1443
        memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
1444
        memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
1445
        memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
1446
    }
1447
}
1448

    
1449
int ff_find_unused_picture(MpegEncContext *s, int shared){
1450
    int i;
1451

    
1452
    if(shared){
1453
        for(i=0; i<MAX_PICTURE_COUNT; i++){
1454
            if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
1455
        }
1456
    }else{
1457
        for(i=0; i<MAX_PICTURE_COUNT; i++){
1458
            if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) return i; //FIXME
1459
        }
1460
        for(i=0; i<MAX_PICTURE_COUNT; i++){
1461
            if(s->picture[i].data[0]==NULL) return i;
1462
        }
1463
    }
1464

    
1465
    assert(0);
1466
    return -1;
1467
}
1468

    
1469
static void update_noise_reduction(MpegEncContext *s){
1470
    int intra, i;
1471

    
1472
    for(intra=0; intra<2; intra++){
1473
        if(s->dct_count[intra] > (1<<16)){
1474
            for(i=0; i<64; i++){
1475
                s->dct_error_sum[intra][i] >>=1;
1476
            }
1477
            s->dct_count[intra] >>= 1;
1478
        }
1479

    
1480
        for(i=0; i<64; i++){
1481
            s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
1482
        }
1483
    }
1484
}
1485

    
1486
/**
1487
 * generic function for encode/decode called after coding/decoding the header and before a frame is coded/decoded
1488
 */
1489
int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
1490
{
1491
    int i;
1492
    AVFrame *pic;
1493
    s->mb_skipped = 0;
1494

    
1495
    assert(s->last_picture_ptr==NULL || s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3);
1496

    
1497
    /* mark&release old frames */
1498
    if (s->pict_type != B_TYPE && s->last_picture_ptr && s->last_picture_ptr != s->next_picture_ptr && s->last_picture_ptr->data[0]) {
1499
        avctx->release_buffer(avctx, (AVFrame*)s->last_picture_ptr);
1500

    
1501
        /* release forgotten pictures */
1502
        /* if(mpeg124/h263) */
1503
        if(!s->encoding){
1504
            for(i=0; i<MAX_PICTURE_COUNT; i++){
1505
                if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
1506
                    av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
1507
                    avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
1508
                }
1509
            }
1510
        }
1511
    }
1512
alloc:
1513
    if(!s->encoding){
1514
        /* release non reference frames */
1515
        for(i=0; i<MAX_PICTURE_COUNT; i++){
1516
            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1517
                s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1518
            }
1519
        }
1520

    
1521
        if(s->current_picture_ptr && s->current_picture_ptr->data[0]==NULL)
1522
            pic= (AVFrame*)s->current_picture_ptr; //we allready have a unused image (maybe it was set before reading the header)
1523
        else{
1524
            i= ff_find_unused_picture(s, 0);
1525
            pic= (AVFrame*)&s->picture[i];
1526
        }
1527

    
1528
        pic->reference= (s->pict_type != B_TYPE || s->codec_id == CODEC_ID_H264)
1529
                        && !s->dropable ? 3 : 0;
1530

    
1531
        pic->coded_picture_number= s->coded_picture_number++;
1532

    
1533
        if( alloc_picture(s, (Picture*)pic, 0) < 0)
1534
            return -1;
1535

    
1536
        s->current_picture_ptr= (Picture*)pic;
1537
        s->current_picture_ptr->top_field_first= s->top_field_first; //FIXME use only the vars from current_pic
1538
        s->current_picture_ptr->interlaced_frame= !s->progressive_frame && !s->progressive_sequence;
1539
    }
1540

    
1541
    s->current_picture_ptr->pict_type= s->pict_type;
1542
//    if(s->flags && CODEC_FLAG_QSCALE)
1543
  //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
1544
    s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
1545

    
1546
    copy_picture(&s->current_picture, s->current_picture_ptr);
1547

    
1548
  if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
1549
    if (s->pict_type != B_TYPE) {
1550
        s->last_picture_ptr= s->next_picture_ptr;
1551
        if(!s->dropable)
1552
            s->next_picture_ptr= s->current_picture_ptr;
1553
    }
1554
/*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
1555
        s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
1556
        s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
1557
        s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
1558
        s->pict_type, s->dropable);*/
1559

    
1560
    if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
1561
    if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
1562

    
1563
    if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
1564
        av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
1565
        assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
1566
        goto alloc;
1567
    }
1568

    
1569
    assert(s->pict_type == I_TYPE || (s->last_picture_ptr && s->last_picture_ptr->data[0]));
1570

    
1571
    if(s->picture_structure!=PICT_FRAME){
1572
        int i;
1573
        for(i=0; i<4; i++){
1574
            if(s->picture_structure == PICT_BOTTOM_FIELD){
1575
                 s->current_picture.data[i] += s->current_picture.linesize[i];
1576
            }
1577
            s->current_picture.linesize[i] *= 2;
1578
            s->last_picture.linesize[i] *=2;
1579
            s->next_picture.linesize[i] *=2;
1580
        }
1581
    }
1582
  }
1583

    
1584
    s->hurry_up= s->avctx->hurry_up;
1585
    s->error_resilience= avctx->error_resilience;
1586

    
1587
    /* set dequantizer, we can't do it during init as it might change for mpeg4
1588
       and we can't do it in the header decode as init isnt called for mpeg4 there yet */
1589
    if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){
1590
        s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
1591
        s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
1592
    }else if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
1593
        s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
1594
        s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
1595
    }else{
1596
        s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
1597
        s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
1598
    }
1599

    
1600
    if(s->dct_error_sum){
1601
        assert(s->avctx->noise_reduction && s->encoding);
1602

    
1603
        update_noise_reduction(s);
1604
    }
1605

    
1606
#ifdef HAVE_XVMC
1607
    if(s->avctx->xvmc_acceleration)
1608
        return XVMC_field_start(s, avctx);
1609
#endif
1610
    return 0;
1611
}
1612

    
1613
/* generic function for encode/decode called after a frame has been coded/decoded */
1614
void MPV_frame_end(MpegEncContext *s)
1615
{
1616
    int i;
1617
    /* draw edge for correct motion prediction if outside */
1618
#ifdef HAVE_XVMC
1619
//just to make sure that all data is rendered.
1620
    if(s->avctx->xvmc_acceleration){
1621
        XVMC_field_end(s);
1622
    }else
1623
#endif
1624
    if(s->unrestricted_mv && s->current_picture.reference && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
1625
            draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
1626
            draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1627
            draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
1628
    }
1629
    emms_c();
1630

    
1631
    s->last_pict_type    = s->pict_type;
1632
    s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
1633
    if(s->pict_type!=B_TYPE){
1634
        s->last_non_b_pict_type= s->pict_type;
1635
    }
1636
#if 0
1637
        /* copy back current_picture variables */
1638
    for(i=0; i<MAX_PICTURE_COUNT; i++){
1639
        if(s->picture[i].data[0] == s->current_picture.data[0]){
1640
            s->picture[i]= s->current_picture;
1641
            break;
1642
        }
1643
    }
1644
    assert(i<MAX_PICTURE_COUNT);
1645
#endif
1646

    
1647
    if(s->encoding){
1648
        /* release non-reference frames */
1649
        for(i=0; i<MAX_PICTURE_COUNT; i++){
1650
            if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/){
1651
                s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
1652
            }
1653
        }
1654
    }
1655
    // clear copies, to avoid confusion
1656
#if 0
1657
    memset(&s->last_picture, 0, sizeof(Picture));
1658
    memset(&s->next_picture, 0, sizeof(Picture));
1659
    memset(&s->current_picture, 0, sizeof(Picture));
1660
#endif
1661
    s->avctx->coded_frame= (AVFrame*)s->current_picture_ptr;
1662
}
1663

    
1664
/**
1665
 * draws an line from (ex, ey) -> (sx, sy).
1666
 * @param w width of the image
1667
 * @param h height of the image
1668
 * @param stride stride/linesize of the image
1669
 * @param color color of the arrow
1670
 */
1671
static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1672
    int t, x, y, fr, f;
1673

    
1674
    sx= clip(sx, 0, w-1);
1675
    sy= clip(sy, 0, h-1);
1676
    ex= clip(ex, 0, w-1);
1677
    ey= clip(ey, 0, h-1);
1678

    
1679
    buf[sy*stride + sx]+= color;
1680

    
1681
    if(ABS(ex - sx) > ABS(ey - sy)){
1682
        if(sx > ex){
1683
            t=sx; sx=ex; ex=t;
1684
            t=sy; sy=ey; ey=t;
1685
        }
1686
        buf+= sx + sy*stride;
1687
        ex-= sx;
1688
        f= ((ey-sy)<<16)/ex;
1689
        for(x= 0; x <= ex; x++){
1690
            y = (x*f)>>16;
1691
            fr= (x*f)&0xFFFF;
1692
            buf[ y   *stride + x]+= (color*(0x10000-fr))>>16;
1693
            buf[(y+1)*stride + x]+= (color*         fr )>>16;
1694
        }
1695
    }else{
1696
        if(sy > ey){
1697
            t=sx; sx=ex; ex=t;
1698
            t=sy; sy=ey; ey=t;
1699
        }
1700
        buf+= sx + sy*stride;
1701
        ey-= sy;
1702
        if(ey) f= ((ex-sx)<<16)/ey;
1703
        else   f= 0;
1704
        for(y= 0; y <= ey; y++){
1705
            x = (y*f)>>16;
1706
            fr= (y*f)&0xFFFF;
1707
            buf[y*stride + x  ]+= (color*(0x10000-fr))>>16;;
1708
            buf[y*stride + x+1]+= (color*         fr )>>16;;
1709
        }
1710
    }
1711
}
1712

    
1713
/**
1714
 * draws an arrow from (ex, ey) -> (sx, sy).
1715
 * @param w width of the image
1716
 * @param h height of the image
1717
 * @param stride stride/linesize of the image
1718
 * @param color color of the arrow
1719
 */
1720
static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
1721
    int dx,dy;
1722

    
1723
    sx= clip(sx, -100, w+100);
1724
    sy= clip(sy, -100, h+100);
1725
    ex= clip(ex, -100, w+100);
1726
    ey= clip(ey, -100, h+100);
1727

    
1728
    dx= ex - sx;
1729
    dy= ey - sy;
1730

    
1731
    if(dx*dx + dy*dy > 3*3){
1732
        int rx=  dx + dy;
1733
        int ry= -dx + dy;
1734
        int length= ff_sqrt((rx*rx + ry*ry)<<8);
1735

    
1736
        //FIXME subpixel accuracy
1737
        rx= ROUNDED_DIV(rx*3<<4, length);
1738
        ry= ROUNDED_DIV(ry*3<<4, length);
1739

    
1740
        draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
1741
        draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
1742
    }
1743
    draw_line(buf, sx, sy, ex, ey, w, h, stride, color);
1744
}
1745

    
1746
/**
1747
 * prints debuging info for the given picture.
1748
 */
1749
void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
1750

    
1751
    if(!pict || !pict->mb_type) return;
1752

    
1753
    if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
1754
        int x,y;
1755

    
1756
        av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
1757
        switch (pict->pict_type) {
1758
            case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
1759
            case FF_P_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"P\n"); break;
1760
            case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
1761
            case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
1762
            case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
1763
            case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
1764
        }
1765
        for(y=0; y<s->mb_height; y++){
1766
            for(x=0; x<s->mb_width; x++){
1767
                if(s->avctx->debug&FF_DEBUG_SKIP){
1768
                    int count= s->mbskip_table[x + y*s->mb_stride];
1769
                    if(count>9) count=9;
1770
                    av_log(s->avctx, AV_LOG_DEBUG, "%1d", count);
1771
                }
1772
                if(s->avctx->debug&FF_DEBUG_QP){
1773
                    av_log(s->avctx, AV_LOG_DEBUG, "%2d", pict->qscale_table[x + y*s->mb_stride]);
1774
                }
1775
                if(s->avctx->debug&FF_DEBUG_MB_TYPE){
1776
                    int mb_type= pict->mb_type[x + y*s->mb_stride];
1777
                    //Type & MV direction
1778
                    if(IS_PCM(mb_type))
1779
                        av_log(s->avctx, AV_LOG_DEBUG, "P");
1780
                    else if(IS_INTRA(mb_type) && IS_ACPRED(mb_type))
1781
                        av_log(s->avctx, AV_LOG_DEBUG, "A");
1782
                    else if(IS_INTRA4x4(mb_type))
1783
                        av_log(s->avctx, AV_LOG_DEBUG, "i");
1784
                    else if(IS_INTRA16x16(mb_type))
1785
                        av_log(s->avctx, AV_LOG_DEBUG, "I");
1786
                    else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type))
1787
                        av_log(s->avctx, AV_LOG_DEBUG, "d");
1788
                    else if(IS_DIRECT(mb_type))
1789
                        av_log(s->avctx, AV_LOG_DEBUG, "D");
1790
                    else if(IS_GMC(mb_type) && IS_SKIP(mb_type))
1791
                        av_log(s->avctx, AV_LOG_DEBUG, "g");
1792
                    else if(IS_GMC(mb_type))
1793
                        av_log(s->avctx, AV_LOG_DEBUG, "G");
1794
                    else if(IS_SKIP(mb_type))
1795
                        av_log(s->avctx, AV_LOG_DEBUG, "S");
1796
                    else if(!USES_LIST(mb_type, 1))
1797
                        av_log(s->avctx, AV_LOG_DEBUG, ">");
1798
                    else if(!USES_LIST(mb_type, 0))
1799
                        av_log(s->avctx, AV_LOG_DEBUG, "<");
1800
                    else{
1801
                        assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1802
                        av_log(s->avctx, AV_LOG_DEBUG, "X");
1803
                    }
1804

    
1805
                    //segmentation
1806
                    if(IS_8X8(mb_type))
1807
                        av_log(s->avctx, AV_LOG_DEBUG, "+");
1808
                    else if(IS_16X8(mb_type))
1809
                        av_log(s->avctx, AV_LOG_DEBUG, "-");
1810
                    else if(IS_8X16(mb_type))
1811
                        av_log(s->avctx, AV_LOG_DEBUG, "|");
1812
                    else if(IS_INTRA(mb_type) || IS_16X16(mb_type))
1813
                        av_log(s->avctx, AV_LOG_DEBUG, " ");
1814
                    else
1815
                        av_log(s->avctx, AV_LOG_DEBUG, "?");
1816

    
1817

    
1818
                    if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
1819
                        av_log(s->avctx, AV_LOG_DEBUG, "=");
1820
                    else
1821
                        av_log(s->avctx, AV_LOG_DEBUG, " ");
1822
                }
1823
//                av_log(s->avctx, AV_LOG_DEBUG, " ");
1824
            }
1825
            av_log(s->avctx, AV_LOG_DEBUG, "\n");
1826
        }
1827
    }
1828

    
1829
    if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
1830
        const int shift= 1 + s->quarter_sample;
1831
        int mb_y;
1832
        uint8_t *ptr;
1833
        int i;
1834
        int h_chroma_shift, v_chroma_shift;
1835
        const int width = s->avctx->width;
1836
        const int height= s->avctx->height;
1837
        const int mv_sample_log2= 4 - pict->motion_subsample_log2;
1838
        const int mv_stride= (s->mb_width << mv_sample_log2) + (s->codec_id == CODEC_ID_H264 ? 0 : 1);
1839
        s->low_delay=0; //needed to see the vectors without trashing the buffers
1840

    
1841
        avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1842
        for(i=0; i<3; i++){
1843
            memcpy(s->visualization_buffer[i], pict->data[i], (i==0) ? pict->linesize[i]*height:pict->linesize[i]*height >> v_chroma_shift);
1844
            pict->data[i]= s->visualization_buffer[i];
1845
        }
1846
        pict->type= FF_BUFFER_TYPE_COPY;
1847
        ptr= pict->data[0];
1848

    
1849
        for(mb_y=0; mb_y<s->mb_height; mb_y++){
1850
            int mb_x;
1851
            for(mb_x=0; mb_x<s->mb_width; mb_x++){
1852
                const int mb_index= mb_x + mb_y*s->mb_stride;
1853
                if((s->avctx->debug_mv) && pict->motion_val){
1854
                  int type;
1855
                  for(type=0; type<3; type++){
1856
                    int direction = 0;
1857
                    switch (type) {
1858
                      case 0: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_P_FOR)) || (pict->pict_type!=FF_P_TYPE))
1859
                                continue;
1860
                              direction = 0;
1861
                              break;
1862
                      case 1: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_FOR)) || (pict->pict_type!=FF_B_TYPE))
1863
                                continue;
1864
                              direction = 0;
1865
                              break;
1866
                      case 2: if ((!(s->avctx->debug_mv&FF_DEBUG_VIS_MV_B_BACK)) || (pict->pict_type!=FF_B_TYPE))
1867
                                continue;
1868
                              direction = 1;
1869
                              break;
1870
                    }
1871
                    if(!USES_LIST(pict->mb_type[mb_index], direction))
1872
                        continue;
1873

    
1874
                    if(IS_8X8(pict->mb_type[mb_index])){
1875
                      int i;
1876
                      for(i=0; i<4; i++){
1877
                        int sx= mb_x*16 + 4 + 8*(i&1);
1878
                        int sy= mb_y*16 + 4 + 8*(i>>1);
1879
                        int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1880
                        int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1881
                        int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1882
                        draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1883
                      }
1884
                    }else if(IS_16X8(pict->mb_type[mb_index])){
1885
                      int i;
1886
                      for(i=0; i<2; i++){
1887
                        int sx=mb_x*16 + 8;
1888
                        int sy=mb_y*16 + 4 + 8*i;
1889
                        int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
1890
                        int mx=(pict->motion_val[direction][xy][0]>>shift);
1891
                        int my=(pict->motion_val[direction][xy][1]>>shift);
1892

    
1893
                        if(IS_INTERLACED(pict->mb_type[mb_index]))
1894
                            my*=2;
1895

    
1896
                        draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1897
                      }
1898
                    }else if(IS_8X16(pict->mb_type[mb_index])){
1899
                      int i;
1900
                      for(i=0; i<2; i++){
1901
                        int sx=mb_x*16 + 4 + 8*i;
1902
                        int sy=mb_y*16 + 8;
1903
                        int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
1904
                        int mx=(pict->motion_val[direction][xy][0]>>shift);
1905
                        int my=(pict->motion_val[direction][xy][1]>>shift);
1906

    
1907
                        if(IS_INTERLACED(pict->mb_type[mb_index]))
1908
                            my*=2;
1909

    
1910
                        draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
1911
                      }
1912
                    }else{
1913
                      int sx= mb_x*16 + 8;
1914
                      int sy= mb_y*16 + 8;
1915
                      int xy= (mb_x + mb_y*mv_stride) << mv_sample_log2;
1916
                      int mx= (pict->motion_val[direction][xy][0]>>shift) + sx;
1917
                      int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
1918
                      draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
1919
                    }
1920
                  }
1921
                }
1922
                if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
1923
                    uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
1924
                    int y;
1925
                    for(y=0; y<8; y++){
1926
                        *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= c;
1927
                        *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= c;
1928
                    }
1929
                }
1930
                if((s->avctx->debug&FF_DEBUG_VIS_MB_TYPE) && pict->motion_val){
1931
                    int mb_type= pict->mb_type[mb_index];
1932
                    uint64_t u,v;
1933
                    int y;
1934
#define COLOR(theta, r)\
1935
u= (int)(128 + r*cos(theta*3.141592/180));\
1936
v= (int)(128 + r*sin(theta*3.141592/180));
1937

    
1938

    
1939
                    u=v=128;
1940
                    if(IS_PCM(mb_type)){
1941
                        COLOR(120,48)
1942
                    }else if((IS_INTRA(mb_type) && IS_ACPRED(mb_type)) || IS_INTRA16x16(mb_type)){
1943
                        COLOR(30,48)
1944
                    }else if(IS_INTRA4x4(mb_type)){
1945
                        COLOR(90,48)
1946
                    }else if(IS_DIRECT(mb_type) && IS_SKIP(mb_type)){
1947
//                        COLOR(120,48)
1948
                    }else if(IS_DIRECT(mb_type)){
1949
                        COLOR(150,48)
1950
                    }else if(IS_GMC(mb_type) && IS_SKIP(mb_type)){
1951
                        COLOR(170,48)
1952
                    }else if(IS_GMC(mb_type)){
1953
                        COLOR(190,48)
1954
                    }else if(IS_SKIP(mb_type)){
1955
//                        COLOR(180,48)
1956
                    }else if(!USES_LIST(mb_type, 1)){
1957
                        COLOR(240,48)
1958
                    }else if(!USES_LIST(mb_type, 0)){
1959
                        COLOR(0,48)
1960
                    }else{
1961
                        assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
1962
                        COLOR(300,48)
1963
                    }
1964

    
1965
                    u*= 0x0101010101010101ULL;
1966
                    v*= 0x0101010101010101ULL;
1967
                    for(y=0; y<8; y++){
1968
                        *(uint64_t*)(pict->data[1] + 8*mb_x + (8*mb_y + y)*pict->linesize[1])= u;
1969
                        *(uint64_t*)(pict->data[2] + 8*mb_x + (8*mb_y + y)*pict->linesize[2])= v;
1970
                    }
1971

    
1972
                    //segmentation
1973
                    if(IS_8X8(mb_type) || IS_16X8(mb_type)){
1974
                        *(uint64_t*)(pict->data[0] + 16*mb_x + 0 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1975
                        *(uint64_t*)(pict->data[0] + 16*mb_x + 8 + (16*mb_y + 8)*pict->linesize[0])^= 0x8080808080808080ULL;
1976
                    }
1977
                    if(IS_8X8(mb_type) || IS_8X16(mb_type)){
1978
                        for(y=0; y<16; y++)
1979
                            pict->data[0][16*mb_x + 8 + (16*mb_y + y)*pict->linesize[0]]^= 0x80;
1980
                    }
1981
                    if(IS_8X8(mb_type) && mv_sample_log2 >= 2){
1982
                        int dm= 1 << (mv_sample_log2-2);
1983
                        for(i=0; i<4; i++){
1984
                            int sx= mb_x*16 + 8*(i&1);
1985
                            int sy= mb_y*16 + 8*(i>>1);
1986
                            int xy= (mb_x*2 + (i&1) + (mb_y*2 + (i>>1))*mv_stride) << (mv_sample_log2-1);
1987
                            //FIXME bidir
1988
                            int32_t *mv = (int32_t*)&pict->motion_val[0][xy];
1989
                            if(mv[0] != mv[dm] || mv[dm*mv_stride] != mv[dm*(mv_stride+1)])
1990
                                for(y=0; y<8; y++)
1991
                                    pict->data[0][sx + 4 + (sy + y)*pict->linesize[0]]^= 0x80;
1992
                            if(mv[0] != mv[dm*mv_stride] || mv[dm] != mv[dm*(mv_stride+1)])
1993
                                *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
1994
                        }
1995
                    }
1996

    
1997
                    if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
1998
                        // hmm
1999
                    }
2000
                }
2001
                s->mbskip_table[mb_index]=0;
2002
            }
2003
        }
2004
    }
2005
}
2006

    
2007
#ifdef CONFIG_ENCODERS
2008

    
2009
static int get_sae(uint8_t *src, int ref, int stride){
2010
    int x,y;
2011
    int acc=0;
2012

    
2013
    for(y=0; y<16; y++){
2014
        for(x=0; x<16; x++){
2015
            acc+= ABS(src[x+y*stride] - ref);
2016
        }
2017
    }
2018

    
2019
    return acc;
2020
}
2021

    
2022
static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
2023
    int x, y, w, h;
2024
    int acc=0;
2025

    
2026
    w= s->width &~15;
2027
    h= s->height&~15;
2028

    
2029
    for(y=0; y<h; y+=16){
2030
        for(x=0; x<w; x+=16){
2031
            int offset= x + y*stride;
2032
            int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
2033
            int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
2034
            int sae = get_sae(src + offset, mean, stride);
2035

    
2036
            acc+= sae + 500 < sad;
2037
        }
2038
    }
2039
    return acc;
2040
}
2041

    
2042

    
2043
static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
2044
    AVFrame *pic=NULL;
2045
    int64_t pts;
2046
    int i;
2047
    const int encoding_delay= s->max_b_frames;
2048
    int direct=1;
2049

    
2050
    if(pic_arg){
2051
        pts= pic_arg->pts;
2052
        pic_arg->display_picture_number= s->input_picture_number++;
2053

    
2054
        if(pts != AV_NOPTS_VALUE){
2055
            if(s->user_specified_pts != AV_NOPTS_VALUE){
2056
                int64_t time= pts;
2057
                int64_t last= s->user_specified_pts;
2058

    
2059
                if(time <= last){
2060
                    av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
2061
                    return -1;
2062
                }
2063
            }
2064
            s->user_specified_pts= pts;
2065
        }else{
2066
            if(s->user_specified_pts != AV_NOPTS_VALUE){
2067
                s->user_specified_pts=
2068
                pts= s->user_specified_pts + 1;
2069
                av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
2070
            }else{
2071
                pts= pic_arg->display_picture_number;
2072
            }
2073
        }
2074
    }
2075

    
2076
  if(pic_arg){
2077
    if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
2078
    if(pic_arg->linesize[0] != s->linesize) direct=0;
2079
    if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
2080
    if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
2081

    
2082
//    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
2083

    
2084
    if(direct){
2085
        i= ff_find_unused_picture(s, 1);
2086

    
2087
        pic= (AVFrame*)&s->picture[i];
2088
        pic->reference= 3;
2089

    
2090
        for(i=0; i<4; i++){
2091
            pic->data[i]= pic_arg->data[i];
2092
            pic->linesize[i]= pic_arg->linesize[i];
2093
        }
2094
        alloc_picture(s, (Picture*)pic, 1);
2095
    }else{
2096
        i= ff_find_unused_picture(s, 0);
2097

    
2098
        pic= (AVFrame*)&s->picture[i];
2099
        pic->reference= 3;
2100

    
2101
        alloc_picture(s, (Picture*)pic, 0);
2102

    
2103
        if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
2104
           && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
2105
           && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
2106
       // empty
2107
        }else{
2108
            int h_chroma_shift, v_chroma_shift;
2109
            avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
2110

    
2111
            for(i=0; i<3; i++){
2112
                int src_stride= pic_arg->linesize[i];
2113
                int dst_stride= i ? s->uvlinesize : s->linesize;
2114
                int h_shift= i ? h_chroma_shift : 0;
2115
                int v_shift= i ? v_chroma_shift : 0;
2116
                int w= s->width >>h_shift;
2117
                int h= s->height>>v_shift;
2118
                uint8_t *src= pic_arg->data[i];
2119
                uint8_t *dst= pic->data[i] + INPLACE_OFFSET;
2120

    
2121
                if(src_stride==dst_stride)
2122
                    memcpy(dst, src, src_stride*h);
2123
                else{
2124
                    while(h--){
2125
                        memcpy(dst, src, w);
2126
                        dst += dst_stride;
2127
                        src += src_stride;
2128
                    }
2129
                }
2130
            }
2131
        }
2132
    }
2133
    copy_picture_attributes(s, pic, pic_arg);
2134
    pic->pts= pts; //we set this here to avoid modifiying pic_arg
2135
  }
2136

    
2137
    /* shift buffer entries */
2138
    for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
2139
        s->input_picture[i-1]= s->input_picture[i];
2140

    
2141
    s->input_picture[encoding_delay]= (Picture*)pic;
2142

    
2143
    return 0;
2144
}
2145

    
2146
static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
2147
    int x, y, plane;
2148
    int score=0;
2149
    int64_t score64=0;
2150

    
2151
    for(plane=0; plane<3; plane++){
2152
        const int stride= p->linesize[plane];
2153
        const int bw= plane ? 1 : 2;
2154
        for(y=0; y<s->mb_height*bw; y++){
2155
            for(x=0; x<s->mb_width*bw; x++){
2156
                int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
2157
                int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
2158

    
2159
                switch(s->avctx->frame_skip_exp){
2160
                    case 0: score= FFMAX(score, v); break;
2161
                    case 1: score+= ABS(v);break;
2162
                    case 2: score+= v*v;break;
2163
                    case 3: score64+= ABS(v*v*(int64_t)v);break;
2164
                    case 4: score64+= v*v*(int64_t)(v*v);break;
2165
                }
2166
            }
2167
        }
2168
    }
2169

    
2170
    if(score) score64= score;
2171

    
2172
    if(score64 < s->avctx->frame_skip_threshold)
2173
        return 1;
2174
    if(score64 < ((s->avctx->frame_skip_factor * (int64_t)s->lambda)>>8))
2175
        return 1;
2176
    return 0;
2177
}
2178

    
2179
static int estimate_best_b_count(MpegEncContext *s){
2180
    AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
2181
    AVCodecContext *c= avcodec_alloc_context();
2182
    AVFrame input[FF_MAX_B_FRAMES+2];
2183
    const int scale= s->avctx->brd_scale;
2184
    int i, j, out_size, p_lambda, b_lambda, lambda2;
2185
    int outbuf_size= s->width * s->height; //FIXME
2186
    uint8_t *outbuf= av_malloc(outbuf_size);
2187
    int64_t best_rd= INT64_MAX;
2188
    int best_b_count= -1;
2189

    
2190
    assert(scale>=0 && scale <=3);
2191

    
2192
//    emms_c();
2193
    p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
2194
    b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
2195
    if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
2196
    lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
2197

    
2198
    c->width = s->width >> scale;
2199
    c->height= s->height>> scale;
2200
    c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
2201
    c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
2202
    c->mb_decision= s->avctx->mb_decision;
2203
    c->me_cmp= s->avctx->me_cmp;
2204
    c->mb_cmp= s->avctx->mb_cmp;
2205
    c->me_sub_cmp= s->avctx->me_sub_cmp;
2206
    c->pix_fmt = PIX_FMT_YUV420P;
2207
    c->time_base= s->avctx->time_base;
2208
    c->max_b_frames= s->max_b_frames;
2209

    
2210
    if (avcodec_open(c, codec) < 0)
2211
        return -1;
2212

    
2213
    for(i=0; i<s->max_b_frames+2; i++){
2214
        int ysize= c->width*c->height;
2215
        int csize= (c->width/2)*(c->height/2);
2216
        Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
2217

    
2218
        if(pre_input_ptr)
2219
            pre_input= *pre_input_ptr;
2220

    
2221
        if(pre_input.type != FF_BUFFER_TYPE_SHARED && i){
2222
            pre_input.data[0]+=INPLACE_OFFSET;
2223
            pre_input.data[1]+=INPLACE_OFFSET;
2224
            pre_input.data[2]+=INPLACE_OFFSET;
2225
        }
2226

    
2227
        avcodec_get_frame_defaults(&input[i]);
2228
        input[i].data[0]= av_malloc(ysize + 2*csize);
2229
        input[i].data[1]= input[i].data[0] + ysize;
2230
        input[i].data[2]= input[i].data[1] + csize;
2231
        input[i].linesize[0]= c->width;
2232
        input[i].linesize[1]=
2233
        input[i].linesize[2]= c->width/2;
2234

    
2235
        if(!i || s->input_picture[i-1]){
2236
            s->dsp.shrink[scale](input[i].data[0], input[i].linesize[0], pre_input.data[0], pre_input.linesize[0], c->width, c->height);
2237
            s->dsp.shrink[scale](input[i].data[1], input[i].linesize[1], pre_input.data[1], pre_input.linesize[1], c->width>>1, c->height>>1);
2238
            s->dsp.shrink[scale](input[i].data[2], input[i].linesize[2], pre_input.data[2], pre_input.linesize[2], c->width>>1, c->height>>1);
2239
        }
2240
    }
2241

    
2242
    for(j=0; j<s->max_b_frames+1; j++){
2243
        int64_t rd=0;
2244

    
2245
        if(!s->input_picture[j])
2246
            break;
2247

    
2248
        c->error[0]= c->error[1]= c->error[2]= 0;
2249

    
2250
        input[0].pict_type= I_TYPE;
2251
        input[0].quality= 1 * FF_QP2LAMBDA;
2252
        out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
2253
//        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
2254

    
2255
        for(i=0; i<s->max_b_frames+1; i++){
2256
            int is_p= i % (j+1) == j || i==s->max_b_frames;
2257

    
2258
            input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
2259
            input[i+1].quality= is_p ? p_lambda : b_lambda;
2260
            out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
2261
            rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2262
        }
2263

    
2264
        /* get the delayed frames */
2265
        while(out_size){
2266
            out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
2267
            rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
2268
        }
2269

    
2270
        rd += c->error[0] + c->error[1] + c->error[2];
2271

    
2272
        if(rd < best_rd){
2273
            best_rd= rd;
2274
            best_b_count= j;
2275
        }
2276
    }
2277

    
2278
    av_freep(&outbuf);
2279
    avcodec_close(c);
2280
    av_freep(&c);
2281

    
2282
    for(i=0; i<s->max_b_frames+2; i++){
2283
        av_freep(&input[i].data[0]);
2284
    }
2285

    
2286
    return best_b_count;
2287
}
2288

    
2289
static void select_input_picture(MpegEncContext *s){
2290
    int i;
2291

    
2292
    for(i=1; i<MAX_PICTURE_COUNT; i++)
2293
        s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
2294
    s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
2295

    
2296
    /* set next picture type & ordering */
2297
    if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
2298
        if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture_ptr==NULL || s->intra_only){
2299
            s->reordered_input_picture[0]= s->input_picture[0];
2300
            s->reordered_input_picture[0]->pict_type= I_TYPE;
2301
            s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2302
        }else{
2303
            int b_frames;
2304

    
2305
            if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
2306
                if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
2307
                //FIXME check that te gop check above is +-1 correct
2308
//av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
2309

    
2310
                    if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2311
                        for(i=0; i<4; i++)
2312
                            s->input_picture[0]->data[i]= NULL;
2313
                        s->input_picture[0]->type= 0;
2314
                    }else{
2315
                        assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
2316
                               || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2317

    
2318
                        s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
2319
                    }
2320

    
2321
                    emms_c();
2322
                    ff_vbv_update(s, 0);
2323

    
2324
                    goto no_output_pic;
2325
                }
2326
            }
2327

    
2328
            if(s->flags&CODEC_FLAG_PASS2){
2329
                for(i=0; i<s->max_b_frames+1; i++){
2330
                    int pict_num= s->input_picture[0]->display_picture_number + i;
2331

    
2332
                    if(pict_num >= s->rc_context.num_entries)
2333
                        break;
2334
                    if(!s->input_picture[i]){
2335
                        s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
2336
                        break;
2337
                    }
2338

    
2339
                    s->input_picture[i]->pict_type=
2340
                        s->rc_context.entry[pict_num].new_pict_type;
2341
                }
2342
            }
2343

    
2344
            if(s->avctx->b_frame_strategy==0){
2345
                b_frames= s->max_b_frames;
2346
                while(b_frames && !s->input_picture[b_frames]) b_frames--;
2347
            }else if(s->avctx->b_frame_strategy==1){
2348
                for(i=1; i<s->max_b_frames+1; i++){
2349
                    if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
2350
                        s->input_picture[i]->b_frame_score=
2351
                            get_intra_count(s, s->input_picture[i  ]->data[0],
2352
                                               s->input_picture[i-1]->data[0], s->linesize) + 1;
2353
                    }
2354
                }
2355
                for(i=0; i<s->max_b_frames+1; i++){
2356
                    if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
2357
                }
2358

    
2359
                b_frames= FFMAX(0, i-1);
2360

    
2361
                /* reset scores */
2362
                for(i=0; i<b_frames+1; i++){
2363
                    s->input_picture[i]->b_frame_score=0;
2364
                }
2365
            }else if(s->avctx->b_frame_strategy==2){
2366
                b_frames= estimate_best_b_count(s);
2367
            }else{
2368
                av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
2369
                b_frames=0;
2370
            }
2371

    
2372
            emms_c();
2373
//static int b_count=0;
2374
//b_count+= b_frames;
2375
//av_log(s->avctx, AV_LOG_DEBUG, "b_frames: %d\n", b_count);
2376

    
2377
            for(i= b_frames - 1; i>=0; i--){
2378
                int type= s->input_picture[i]->pict_type;
2379
                if(type && type != B_TYPE)
2380
                    b_frames= i;
2381
            }
2382
            if(s->input_picture[b_frames]->pict_type == B_TYPE && b_frames == s->max_b_frames){
2383
                av_log(s->avctx, AV_LOG_ERROR, "warning, too many b frames in a row\n");
2384
            }
2385

    
2386
            if(s->picture_in_gop_number + b_frames >= s->gop_size){
2387
              if((s->flags2 & CODEC_FLAG2_STRICT_GOP) && s->gop_size > s->picture_in_gop_number){
2388
                    b_frames= s->gop_size - s->picture_in_gop_number - 1;
2389
              }else{
2390
                if(s->flags & CODEC_FLAG_CLOSED_GOP)
2391
                    b_frames=0;
2392
                s->input_picture[b_frames]->pict_type= I_TYPE;
2393
              }
2394
            }
2395

    
2396
            if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
2397
               && b_frames
2398
               && s->input_picture[b_frames]->pict_type== I_TYPE)
2399
                b_frames--;
2400

    
2401
            s->reordered_input_picture[0]= s->input_picture[b_frames];
2402
            if(s->reordered_input_picture[0]->pict_type != I_TYPE)
2403
                s->reordered_input_picture[0]->pict_type= P_TYPE;
2404
            s->reordered_input_picture[0]->coded_picture_number= s->coded_picture_number++;
2405
            for(i=0; i<b_frames; i++){
2406
                s->reordered_input_picture[i+1]= s->input_picture[i];
2407
                s->reordered_input_picture[i+1]->pict_type= B_TYPE;
2408
                s->reordered_input_picture[i+1]->coded_picture_number= s->coded_picture_number++;
2409
            }
2410
        }
2411
    }
2412
no_output_pic:
2413
    if(s->reordered_input_picture[0]){
2414
        s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE ? 3 : 0;
2415

    
2416
        copy_picture(&s->new_picture, s->reordered_input_picture[0]);
2417

    
2418
        if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
2419
            // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
2420

    
2421
            int i= ff_find_unused_picture(s, 0);
2422
            Picture *pic= &s->picture[i];
2423

    
2424
            /* mark us unused / free shared pic */
2425
            for(i=0; i<4; i++)
2426
                s->reordered_input_picture[0]->data[i]= NULL;
2427
            s->reordered_input_picture[0]->type= 0;
2428

    
2429
            pic->reference              = s->reordered_input_picture[0]->reference;
2430

    
2431
            alloc_picture(s, pic, 0);
2432

    
2433
            copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
2434

    
2435
            s->current_picture_ptr= pic;
2436
        }else{
2437
            // input is not a shared pix -> reuse buffer for current_pix
2438

    
2439
            assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
2440
                   || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
2441

    
2442
            s->current_picture_ptr= s->reordered_input_picture[0];
2443
            for(i=0; i<4; i++){
2444
                s->new_picture.data[i]+= INPLACE_OFFSET;
2445
            }
2446
        }
2447
        copy_picture(&s->current_picture, s->current_picture_ptr);
2448

    
2449
        s->picture_number= s->new_picture.display_picture_number;
2450
//printf("dpn:%d\n", s->picture_number);
2451
    }else{
2452
       memset(&s->new_picture, 0, sizeof(Picture));
2453
    }
2454
}
2455

    
2456
int MPV_encode_picture(AVCodecContext *avctx,
2457
                       unsigned char *buf, int buf_size, void *data)
2458
{
2459
    MpegEncContext *s = avctx->priv_data;
2460
    AVFrame *pic_arg = data;
2461
    int i, stuffing_count;
2462

    
2463
    if(avctx->pix_fmt != PIX_FMT_YUV420P && avctx->pix_fmt != PIX_FMT_YUVJ420P){
2464
        av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
2465
        return -1;
2466
    }
2467

    
2468
    for(i=0; i<avctx->thread_count; i++){
2469
        int start_y= s->thread_context[i]->start_mb_y;
2470
        int   end_y= s->thread_context[i]->  end_mb_y;
2471
        int h= s->mb_height;
2472
        uint8_t *start= buf + (size_t)(((int64_t) buf_size)*start_y/h);
2473
        uint8_t *end  = buf + (size_t)(((int64_t) buf_size)*  end_y/h);
2474

    
2475
        init_put_bits(&s->thread_context[i]->pb, start, end - start);
2476
    }
2477

    
2478
    s->picture_in_gop_number++;
2479

    
2480
    if(load_input_picture(s, pic_arg) < 0)
2481
        return -1;
2482

    
2483
    select_input_picture(s);
2484

    
2485
    /* output? */
2486
    if(s->new_picture.data[0]){
2487
        s->pict_type= s->new_picture.pict_type;
2488
//emms_c();
2489
//printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
2490
        MPV_frame_start(s, avctx);
2491

    
2492
        encode_picture(s, s->picture_number);
2493

    
2494
        avctx->real_pict_num  = s->picture_number;
2495
        avctx->header_bits = s->header_bits;
2496
        avctx->mv_bits     = s->mv_bits;
2497
        avctx->misc_bits   = s->misc_bits;
2498
        avctx->i_tex_bits  = s->i_tex_bits;
2499
        avctx->p_tex_bits  = s->p_tex_bits;
2500
        avctx->i_count     = s->i_count;
2501
        avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
2502
        avctx->skip_count  = s->skip_count;
2503

    
2504
        MPV_frame_end(s);
2505

    
2506
        if (s->out_format == FMT_MJPEG)
2507
            mjpeg_picture_trailer(s);
2508

    
2509
        if(s->flags&CODEC_FLAG_PASS1)
2510
            ff_write_pass1_stats(s);
2511

    
2512
        for(i=0; i<4; i++){
2513
            s->current_picture_ptr->error[i]= s->current_picture.error[i];
2514
            avctx->error[i] += s->current_picture_ptr->error[i];
2515
        }
2516

    
2517
        if(s->flags&CODEC_FLAG_PASS1)
2518
            assert(avctx->header_bits + avctx->mv_bits + avctx->misc_bits + avctx->i_tex_bits + avctx->p_tex_bits == put_bits_count(&s->pb));
2519
        flush_put_bits(&s->pb);
2520
        s->frame_bits  = put_bits_count(&s->pb);
2521

    
2522
        stuffing_count= ff_vbv_update(s, s->frame_bits);
2523
        if(stuffing_count){
2524
            if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < stuffing_count + 50){
2525
                av_log(s->avctx, AV_LOG_ERROR, "stuffing too large\n");
2526
                return -1;
2527
            }
2528

    
2529
            switch(s->codec_id){
2530
            case CODEC_ID_MPEG1VIDEO:
2531
            case CODEC_ID_MPEG2VIDEO:
2532
                while(stuffing_count--){
2533
                    put_bits(&s->pb, 8, 0);
2534
                }
2535
            break;
2536
            case CODEC_ID_MPEG4:
2537
                put_bits(&s->pb, 16, 0);
2538
                put_bits(&s->pb, 16, 0x1C3);
2539
                stuffing_count -= 4;
2540
                while(stuffing_count--){
2541
                    put_bits(&s->pb, 8, 0xFF);
2542
                }
2543
            break;
2544
            default:
2545
                av_log(s->avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
2546
            }
2547
            flush_put_bits(&s->pb);
2548
            s->frame_bits  = put_bits_count(&s->pb);
2549
        }
2550

    
2551
        /* update mpeg1/2 vbv_delay for CBR */
2552
        if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
2553
           && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
2554
            int vbv_delay;
2555

    
2556
            assert(s->repeat_first_field==0);
2557

    
2558
            vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
2559
            assert(vbv_delay < 0xFFFF);
2560

    
2561
            s->vbv_delay_ptr[0] &= 0xF8;
2562
            s->vbv_delay_ptr[0] |= vbv_delay>>13;
2563
            s->vbv_delay_ptr[1]  = vbv_delay>>5;
2564
            s->vbv_delay_ptr[2] &= 0x07;
2565
            s->vbv_delay_ptr[2] |= vbv_delay<<3;
2566
        }
2567
        s->total_bits += s->frame_bits;
2568
        avctx->frame_bits  = s->frame_bits;
2569
    }else{
2570
        assert((pbBufPtr(&s->pb) == s->pb.buf));
2571
        s->frame_bits=0;
2572
    }
2573
    assert((s->frame_bits&7)==0);
2574

    
2575
    return s->frame_bits/8;
2576
}
2577

    
2578
#endif //CONFIG_ENCODERS
2579

    
2580
static inline void gmc1_motion(MpegEncContext *s,
2581
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2582
                               uint8_t **ref_picture)
2583
{
2584
    uint8_t *ptr;
2585
    int offset, src_x, src_y, linesize, uvlinesize;
2586
    int motion_x, motion_y;
2587
    int emu=0;
2588

    
2589
    motion_x= s->sprite_offset[0][0];
2590
    motion_y= s->sprite_offset[0][1];
2591
    src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
2592
    src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
2593
    motion_x<<=(3-s->sprite_warping_accuracy);
2594
    motion_y<<=(3-s->sprite_warping_accuracy);
2595
    src_x = clip(src_x, -16, s->width);
2596
    if (src_x == s->width)
2597
        motion_x =0;
2598
    src_y = clip(src_y, -16, s->height);
2599
    if (src_y == s->height)
2600
        motion_y =0;
2601

    
2602
    linesize = s->linesize;
2603
    uvlinesize = s->uvlinesize;
2604

    
2605
    ptr = ref_picture[0] + (src_y * linesize) + src_x;
2606

    
2607
    if(s->flags&CODEC_FLAG_EMU_EDGE){
2608
        if(   (unsigned)src_x >= s->h_edge_pos - 17
2609
           || (unsigned)src_y >= s->v_edge_pos - 17){
2610
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
2611
            ptr= s->edge_emu_buffer;
2612
        }
2613
    }
2614

    
2615
    if((motion_x|motion_y)&7){
2616
        s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2617
        s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
2618
    }else{
2619
        int dxy;
2620

    
2621
        dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
2622
        if (s->no_rounding){
2623
            s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
2624
        }else{
2625
            s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
2626
        }
2627
    }
2628

    
2629
    if(s->flags&CODEC_FLAG_GRAY) return;
2630

    
2631
    motion_x= s->sprite_offset[1][0];
2632
    motion_y= s->sprite_offset[1][1];
2633
    src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
2634
    src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
2635
    motion_x<<=(3-s->sprite_warping_accuracy);
2636
    motion_y<<=(3-s->sprite_warping_accuracy);
2637
    src_x = clip(src_x, -8, s->width>>1);
2638
    if (src_x == s->width>>1)
2639
        motion_x =0;
2640
    src_y = clip(src_y, -8, s->height>>1);
2641
    if (src_y == s->height>>1)
2642
        motion_y =0;
2643

    
2644
    offset = (src_y * uvlinesize) + src_x;
2645
    ptr = ref_picture[1] + offset;
2646
    if(s->flags&CODEC_FLAG_EMU_EDGE){
2647
        if(   (unsigned)src_x >= (s->h_edge_pos>>1) - 9
2648
           || (unsigned)src_y >= (s->v_edge_pos>>1) - 9){
2649
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2650
            ptr= s->edge_emu_buffer;
2651
            emu=1;
2652
        }
2653
    }
2654
    s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2655

    
2656
    ptr = ref_picture[2] + offset;
2657
    if(emu){
2658
        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
2659
        ptr= s->edge_emu_buffer;
2660
    }
2661
    s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
2662

    
2663
    return;
2664
}
2665

    
2666
static inline void gmc_motion(MpegEncContext *s,
2667
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2668
                               uint8_t **ref_picture)
2669
{
2670
    uint8_t *ptr;
2671
    int linesize, uvlinesize;
2672
    const int a= s->sprite_warping_accuracy;
2673
    int ox, oy;
2674

    
2675
    linesize = s->linesize;
2676
    uvlinesize = s->uvlinesize;
2677

    
2678
    ptr = ref_picture[0];
2679

    
2680
    ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
2681
    oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
2682

    
2683
    s->dsp.gmc(dest_y, ptr, linesize, 16,
2684
           ox,
2685
           oy,
2686
           s->sprite_delta[0][0], s->sprite_delta[0][1],
2687
           s->sprite_delta[1][0], s->sprite_delta[1][1],
2688
           a+1, (1<<(2*a+1)) - s->no_rounding,
2689
           s->h_edge_pos, s->v_edge_pos);
2690
    s->dsp.gmc(dest_y+8, ptr, linesize, 16,
2691
           ox + s->sprite_delta[0][0]*8,
2692
           oy + s->sprite_delta[1][0]*8,
2693
           s->sprite_delta[0][0], s->sprite_delta[0][1],
2694
           s->sprite_delta[1][0], s->sprite_delta[1][1],
2695
           a+1, (1<<(2*a+1)) - s->no_rounding,
2696
           s->h_edge_pos, s->v_edge_pos);
2697

    
2698
    if(s->flags&CODEC_FLAG_GRAY) return;
2699

    
2700
    ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
2701
    oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
2702

    
2703
    ptr = ref_picture[1];
2704
    s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
2705
           ox,
2706
           oy,
2707
           s->sprite_delta[0][0], s->sprite_delta[0][1],
2708
           s->sprite_delta[1][0], s->sprite_delta[1][1],
2709
           a+1, (1<<(2*a+1)) - s->no_rounding,
2710
           s->h_edge_pos>>1, s->v_edge_pos>>1);
2711

    
2712
    ptr = ref_picture[2];
2713
    s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
2714
           ox,
2715
           oy,
2716
           s->sprite_delta[0][0], s->sprite_delta[0][1],
2717
           s->sprite_delta[1][0], s->sprite_delta[1][1],
2718
           a+1, (1<<(2*a+1)) - s->no_rounding,
2719
           s->h_edge_pos>>1, s->v_edge_pos>>1);
2720
}
2721

    
2722
/**
2723
 * Copies a rectangular area of samples to a temporary buffer and replicates the boarder samples.
2724
 * @param buf destination buffer
2725
 * @param src source buffer
2726
 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
2727
 * @param block_w width of block
2728
 * @param block_h height of block
2729
 * @param src_x x coordinate of the top left sample of the block in the source buffer
2730
 * @param src_y y coordinate of the top left sample of the block in the source buffer
2731
 * @param w width of the source buffer
2732
 * @param h height of the source buffer
2733
 */
2734
void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
2735
                                    int src_x, int src_y, int w, int h){
2736
    int x, y;
2737
    int start_y, start_x, end_y, end_x;
2738

    
2739
    if(src_y>= h){
2740
        src+= (h-1-src_y)*linesize;
2741
        src_y=h-1;
2742
    }else if(src_y<=-block_h){
2743
        src+= (1-block_h-src_y)*linesize;
2744
        src_y=1-block_h;
2745
    }
2746
    if(src_x>= w){
2747
        src+= (w-1-src_x);
2748
        src_x=w-1;
2749
    }else if(src_x<=-block_w){
2750
        src+= (1-block_w-src_x);
2751
        src_x=1-block_w;
2752
    }
2753

    
2754
    start_y= FFMAX(0, -src_y);
2755
    start_x= FFMAX(0, -src_x);
2756
    end_y= FFMIN(block_h, h-src_y);
2757
    end_x= FFMIN(block_w, w-src_x);
2758

    
2759
    // copy existing part
2760
    for(y=start_y; y<end_y; y++){
2761
        for(x=start_x; x<end_x; x++){
2762
            buf[x + y*linesize]= src[x + y*linesize];
2763
        }
2764
    }
2765

    
2766
    //top
2767
    for(y=0; y<start_y; y++){
2768
        for(x=start_x; x<end_x; x++){
2769
            buf[x + y*linesize]= buf[x + start_y*linesize];
2770
        }
2771
    }
2772

    
2773
    //bottom
2774
    for(y=end_y; y<block_h; y++){
2775
        for(x=start_x; x<end_x; x++){
2776
            buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
2777
        }
2778
    }
2779

    
2780
    for(y=0; y<block_h; y++){
2781
       //left
2782
        for(x=0; x<start_x; x++){
2783
            buf[x + y*linesize]= buf[start_x + y*linesize];
2784
        }
2785

    
2786
       //right
2787
        for(x=end_x; x<block_w; x++){
2788
            buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
2789
        }
2790
    }
2791
}
2792

    
2793
static inline int hpel_motion(MpegEncContext *s,
2794
                                  uint8_t *dest, uint8_t *src,
2795
                                  int field_based, int field_select,
2796
                                  int src_x, int src_y,
2797
                                  int width, int height, int stride,
2798
                                  int h_edge_pos, int v_edge_pos,
2799
                                  int w, int h, op_pixels_func *pix_op,
2800
                                  int motion_x, int motion_y)
2801
{
2802
    int dxy;
2803
    int emu=0;
2804

    
2805
    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2806
    src_x += motion_x >> 1;
2807
    src_y += motion_y >> 1;
2808

    
2809
    /* WARNING: do no forget half pels */
2810
    src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
2811
    if (src_x == width)
2812
        dxy &= ~1;
2813
    src_y = clip(src_y, -16, height);
2814
    if (src_y == height)
2815
        dxy &= ~2;
2816
    src += src_y * stride + src_x;
2817

    
2818
    if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
2819
        if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
2820
           || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
2821
            ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2822
                             src_x, src_y<<field_based, h_edge_pos, s->v_edge_pos);
2823
            src= s->edge_emu_buffer;
2824
            emu=1;
2825
        }
2826
    }
2827
    if(field_select)
2828
        src += s->linesize;
2829
    pix_op[dxy](dest, src, stride, h);
2830
    return emu;
2831
}
2832

    
2833
static inline int hpel_motion_lowres(MpegEncContext *s,
2834
                                  uint8_t *dest, uint8_t *src,
2835
                                  int field_based, int field_select,
2836
                                  int src_x, int src_y,
2837
                                  int width, int height, int stride,
2838
                                  int h_edge_pos, int v_edge_pos,
2839
                                  int w, int h, h264_chroma_mc_func *pix_op,
2840
                                  int motion_x, int motion_y)
2841
{
2842
    const int lowres= s->avctx->lowres;
2843
    const int s_mask= (2<<lowres)-1;
2844
    int emu=0;
2845
    int sx, sy;
2846

    
2847
    if(s->quarter_sample){
2848
        motion_x/=2;
2849
        motion_y/=2;
2850
    }
2851

    
2852
    sx= motion_x & s_mask;
2853
    sy= motion_y & s_mask;
2854
    src_x += motion_x >> (lowres+1);
2855
    src_y += motion_y >> (lowres+1);
2856

    
2857
    src += src_y * stride + src_x;
2858

    
2859
    if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
2860
       || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
2861
        ff_emulated_edge_mc(s->edge_emu_buffer, src, s->linesize, w+1, (h+1)<<field_based,
2862
                            src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
2863
        src= s->edge_emu_buffer;
2864
        emu=1;
2865
    }
2866

    
2867
    sx <<= 2 - lowres;
2868
    sy <<= 2 - lowres;
2869
    if(field_select)
2870
        src += s->linesize;
2871
    pix_op[lowres](dest, src, stride, h, sx, sy);
2872
    return emu;
2873
}
2874

    
2875
/* apply one mpeg motion vector to the three components */
2876
static always_inline void mpeg_motion(MpegEncContext *s,
2877
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2878
                               int field_based, int bottom_field, int field_select,
2879
                               uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
2880
                               int motion_x, int motion_y, int h)
2881
{
2882
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2883
    int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
2884

    
2885
#if 0
2886
if(s->quarter_sample)
2887
{
2888
    motion_x>>=1;
2889
    motion_y>>=1;
2890
}
2891
#endif
2892

    
2893
    v_edge_pos = s->v_edge_pos >> field_based;
2894
    linesize   = s->current_picture.linesize[0] << field_based;
2895
    uvlinesize = s->current_picture.linesize[1] << field_based;
2896

    
2897
    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
2898
    src_x = s->mb_x* 16               + (motion_x >> 1);
2899
    src_y =(s->mb_y<<(4-field_based)) + (motion_y >> 1);
2900

    
2901
    if (s->out_format == FMT_H263) {
2902
        if((s->workaround_bugs & FF_BUG_HPEL_CHROMA) && field_based){
2903
            mx = (motion_x>>1)|(motion_x&1);
2904
            my = motion_y >>1;
2905
            uvdxy = ((my & 1) << 1) | (mx & 1);
2906
            uvsrc_x = s->mb_x* 8               + (mx >> 1);
2907
            uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2908
        }else{
2909
            uvdxy = dxy | (motion_y & 2) | ((motion_x & 2) >> 1);
2910
            uvsrc_x = src_x>>1;
2911
            uvsrc_y = src_y>>1;
2912
        }
2913
    }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
2914
        mx = motion_x / 4;
2915
        my = motion_y / 4;
2916
        uvdxy = 0;
2917
        uvsrc_x = s->mb_x*8 + mx;
2918
        uvsrc_y = s->mb_y*8 + my;
2919
    } else {
2920
        if(s->chroma_y_shift){
2921
            mx = motion_x / 2;
2922
            my = motion_y / 2;
2923
            uvdxy = ((my & 1) << 1) | (mx & 1);
2924
            uvsrc_x = s->mb_x* 8               + (mx >> 1);
2925
            uvsrc_y = (s->mb_y<<(3-field_based)) + (my >> 1);
2926
        } else {
2927
            if(s->chroma_x_shift){
2928
            //Chroma422
2929
                mx = motion_x / 2;
2930
                uvdxy = ((motion_y & 1) << 1) | (mx & 1);
2931
                uvsrc_x = s->mb_x* 8           + (mx >> 1);
2932
                uvsrc_y = src_y;
2933
            } else {
2934
            //Chroma444
2935
                uvdxy = dxy;
2936
                uvsrc_x = src_x;
2937
                uvsrc_y = src_y;
2938
            }
2939
        }
2940
    }
2941

    
2942
    ptr_y  = ref_picture[0] + src_y * linesize + src_x;
2943
    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
2944
    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
2945

    
2946
    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
2947
       || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
2948
            if(s->codec_id == CODEC_ID_MPEG2VIDEO ||
2949
               s->codec_id == CODEC_ID_MPEG1VIDEO){
2950
                av_log(s->avctx,AV_LOG_DEBUG,"MPEG motion vector out of boundary\n");
2951
                return ;
2952
            }
2953
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
2954
                             src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
2955
            ptr_y = s->edge_emu_buffer;
2956
            if(!(s->flags&CODEC_FLAG_GRAY)){
2957
                uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
2958
                ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
2959
                                 uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2960
                ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
2961
                                 uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
2962
                ptr_cb= uvbuf;
2963
                ptr_cr= uvbuf+16;
2964
            }
2965
    }
2966

    
2967
    if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
2968
        dest_y += s->linesize;
2969
        dest_cb+= s->uvlinesize;
2970
        dest_cr+= s->uvlinesize;
2971
    }
2972

    
2973
    if(field_select){
2974
        ptr_y += s->linesize;
2975
        ptr_cb+= s->uvlinesize;
2976
        ptr_cr+= s->uvlinesize;
2977
    }
2978

    
2979
    pix_op[0][dxy](dest_y, ptr_y, linesize, h);
2980

    
2981
    if(!(s->flags&CODEC_FLAG_GRAY)){
2982
        pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
2983
        pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
2984
    }
2985
#if defined(CONFIG_H261_ENCODER) || defined(CONFIG_H261_DECODER)
2986
    if(s->out_format == FMT_H261){
2987
        ff_h261_loop_filter(s);
2988
    }
2989
#endif
2990
}
2991

    
2992
/* apply one mpeg motion vector to the three components */
2993
static always_inline void mpeg_motion_lowres(MpegEncContext *s,
2994
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
2995
                               int field_based, int bottom_field, int field_select,
2996
                               uint8_t **ref_picture, h264_chroma_mc_func *pix_op,
2997
                               int motion_x, int motion_y, int h)
2998
{
2999
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3000
    int mx, my, src_x, src_y, uvsrc_x, uvsrc_y, uvlinesize, linesize, sx, sy, uvsx, uvsy;
3001
    const int lowres= s->avctx->lowres;
3002
    const int block_s= 8>>lowres;
3003
    const int s_mask= (2<<lowres)-1;
3004
    const int h_edge_pos = s->h_edge_pos >> lowres;
3005
    const int v_edge_pos = s->v_edge_pos >> lowres;
3006
    linesize   = s->current_picture.linesize[0] << field_based;
3007
    uvlinesize = s->current_picture.linesize[1] << field_based;
3008

    
3009
    if(s->quarter_sample){ //FIXME obviously not perfect but qpel wont work in lowres anyway
3010
        motion_x/=2;
3011
        motion_y/=2;
3012
    }
3013

    
3014
    if(field_based){
3015
        motion_y += (bottom_field - field_select)*((1<<lowres)-1);
3016
    }
3017

    
3018
    sx= motion_x & s_mask;
3019
    sy= motion_y & s_mask;
3020
    src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
3021
    src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
3022

    
3023
    if (s->out_format == FMT_H263) {
3024
        uvsx = ((motion_x>>1) & s_mask) | (sx&1);
3025
        uvsy = ((motion_y>>1) & s_mask) | (sy&1);
3026
        uvsrc_x = src_x>>1;
3027
        uvsrc_y = src_y>>1;
3028
    }else if(s->out_format == FMT_H261){//even chroma mv's are full pel in H261
3029
        mx = motion_x / 4;
3030
        my = motion_y / 4;
3031
        uvsx = (2*mx) & s_mask;
3032
        uvsy = (2*my) & s_mask;
3033
        uvsrc_x = s->mb_x*block_s               + (mx >> lowres);
3034
        uvsrc_y = s->mb_y*block_s               + (my >> lowres);
3035
    } else {
3036
        mx = motion_x / 2;
3037
        my = motion_y / 2;
3038
        uvsx = mx & s_mask;
3039
        uvsy = my & s_mask;
3040
        uvsrc_x = s->mb_x*block_s               + (mx >> (lowres+1));
3041
        uvsrc_y =(s->mb_y*block_s>>field_based) + (my >> (lowres+1));
3042
    }
3043

    
3044
    ptr_y  = ref_picture[0] + src_y * linesize + src_x;
3045
    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3046
    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3047

    
3048
    if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - 2*block_s
3049
       || (unsigned)src_y >(v_edge_pos >> field_based) - (!!sy) - h){
3050
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3051
                             src_x, src_y<<field_based, h_edge_pos, v_edge_pos);
3052
            ptr_y = s->edge_emu_buffer;
3053
            if(!(s->flags&CODEC_FLAG_GRAY)){
3054
                uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
3055
                ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
3056
                                 uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3057
                ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
3058
                                 uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
3059
                ptr_cb= uvbuf;
3060
                ptr_cr= uvbuf+16;
3061
            }
3062
    }
3063

    
3064
    if(bottom_field){ //FIXME use this for field pix too instead of the obnoxious hack which changes picture.data
3065
        dest_y += s->linesize;
3066
        dest_cb+= s->uvlinesize;
3067
        dest_cr+= s->uvlinesize;
3068
    }
3069

    
3070
    if(field_select){
3071
        ptr_y += s->linesize;
3072
        ptr_cb+= s->uvlinesize;
3073
        ptr_cr+= s->uvlinesize;
3074
    }
3075

    
3076
    sx <<= 2 - lowres;
3077
    sy <<= 2 - lowres;
3078
    pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
3079

    
3080
    if(!(s->flags&CODEC_FLAG_GRAY)){
3081
        uvsx <<= 2 - lowres;
3082
        uvsy <<= 2 - lowres;
3083
        pix_op[lowres](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3084
        pix_op[lowres](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift, uvsx, uvsy);
3085
    }
3086
    //FIXME h261 lowres loop filter
3087
}
3088

    
3089
//FIXME move to dsputil, avg variant, 16x16 version
3090
static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
3091
    int x;
3092
    uint8_t * const top   = src[1];
3093
    uint8_t * const left  = src[2];
3094
    uint8_t * const mid   = src[0];
3095
    uint8_t * const right = src[3];
3096
    uint8_t * const bottom= src[4];
3097
#define OBMC_FILTER(x, t, l, m, r, b)\
3098
    dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
3099
#define OBMC_FILTER4(x, t, l, m, r, b)\
3100
    OBMC_FILTER(x         , t, l, m, r, b);\
3101
    OBMC_FILTER(x+1       , t, l, m, r, b);\
3102
    OBMC_FILTER(x  +stride, t, l, m, r, b);\
3103
    OBMC_FILTER(x+1+stride, t, l, m, r, b);
3104

    
3105
    x=0;
3106
    OBMC_FILTER (x  , 2, 2, 4, 0, 0);
3107
    OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
3108
    OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
3109
    OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
3110
    OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
3111
    OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
3112
    x+= stride;
3113
    OBMC_FILTER (x  , 1, 2, 5, 0, 0);
3114
    OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
3115
    OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
3116
    OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
3117
    x+= stride;
3118
    OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
3119
    OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
3120
    OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
3121
    OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
3122
    x+= 2*stride;
3123
    OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
3124
    OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
3125
    OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
3126
    OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
3127
    x+= 2*stride;
3128
    OBMC_FILTER (x  , 0, 2, 5, 0, 1);
3129
    OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
3130
    OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
3131
    OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
3132
    OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
3133
    OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
3134
    x+= stride;
3135
    OBMC_FILTER (x  , 0, 2, 4, 0, 2);
3136
    OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
3137
    OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
3138
    OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
3139
}
3140

    
3141
/* obmc for 1 8x8 luma block */
3142
static inline void obmc_motion(MpegEncContext *s,
3143
                               uint8_t *dest, uint8_t *src,
3144
                               int src_x, int src_y,
3145
                               op_pixels_func *pix_op,
3146
                               int16_t mv[5][2]/* mid top left right bottom*/)
3147
#define MID    0
3148
{
3149
    int i;
3150
    uint8_t *ptr[5];
3151

    
3152
    assert(s->quarter_sample==0);
3153

    
3154
    for(i=0; i<5; i++){
3155
        if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
3156
            ptr[i]= ptr[MID];
3157
        }else{
3158
            ptr[i]= s->obmc_scratchpad + 8*(i&1) + s->linesize*8*(i>>1);
3159
            hpel_motion(s, ptr[i], src, 0, 0,
3160
                        src_x, src_y,
3161
                        s->width, s->height, s->linesize,
3162
                        s->h_edge_pos, s->v_edge_pos,
3163
                        8, 8, pix_op,
3164
                        mv[i][0], mv[i][1]);
3165
        }
3166
    }
3167

    
3168
    put_obmc(dest, ptr, s->linesize);
3169
}
3170

    
3171
static inline void qpel_motion(MpegEncContext *s,
3172
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3173
                               int field_based, int bottom_field, int field_select,
3174
                               uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
3175
                               qpel_mc_func (*qpix_op)[16],
3176
                               int motion_x, int motion_y, int h)
3177
{
3178
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
3179
    int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, linesize, uvlinesize;
3180

    
3181
    dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3182
    src_x = s->mb_x *  16                 + (motion_x >> 2);
3183
    src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
3184

    
3185
    v_edge_pos = s->v_edge_pos >> field_based;
3186
    linesize = s->linesize << field_based;
3187
    uvlinesize = s->uvlinesize << field_based;
3188

    
3189
    if(field_based){
3190
        mx= motion_x/2;
3191
        my= motion_y>>1;
3192
    }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA2){
3193
        static const int rtab[8]= {0,0,1,1,0,0,0,1};
3194
        mx= (motion_x>>1) + rtab[motion_x&7];
3195
        my= (motion_y>>1) + rtab[motion_y&7];
3196
    }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
3197
        mx= (motion_x>>1)|(motion_x&1);
3198
        my= (motion_y>>1)|(motion_y&1);
3199
    }else{
3200
        mx= motion_x/2;
3201
        my= motion_y/2;
3202
    }
3203
    mx= (mx>>1)|(mx&1);
3204
    my= (my>>1)|(my&1);
3205

    
3206
    uvdxy= (mx&1) | ((my&1)<<1);
3207
    mx>>=1;
3208
    my>>=1;
3209

    
3210
    uvsrc_x = s->mb_x *  8                 + mx;
3211
    uvsrc_y = s->mb_y * (8 >> field_based) + my;
3212

    
3213
    ptr_y  = ref_picture[0] +   src_y *   linesize +   src_x;
3214
    ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
3215
    ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
3216

    
3217
    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
3218
       || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
3219
        ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
3220
                         src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
3221
        ptr_y= s->edge_emu_buffer;
3222
        if(!(s->flags&CODEC_FLAG_GRAY)){
3223
            uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
3224
            ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
3225
                             uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3226
            ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
3227
                             uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
3228
            ptr_cb= uvbuf;
3229
            ptr_cr= uvbuf + 16;
3230
        }
3231
    }
3232

    
3233
    if(!field_based)
3234
        qpix_op[0][dxy](dest_y, ptr_y, linesize);
3235
    else{
3236
        if(bottom_field){
3237
            dest_y += s->linesize;
3238
            dest_cb+= s->uvlinesize;
3239
            dest_cr+= s->uvlinesize;
3240
        }
3241

    
3242
        if(field_select){
3243
            ptr_y  += s->linesize;
3244
            ptr_cb += s->uvlinesize;
3245
            ptr_cr += s->uvlinesize;
3246
        }
3247
        //damn interlaced mode
3248
        //FIXME boundary mirroring is not exactly correct here
3249
        qpix_op[1][dxy](dest_y  , ptr_y  , linesize);
3250
        qpix_op[1][dxy](dest_y+8, ptr_y+8, linesize);
3251
    }
3252
    if(!(s->flags&CODEC_FLAG_GRAY)){
3253
        pix_op[1][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> 1);
3254
        pix_op[1][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> 1);
3255
    }
3256
}
3257

    
3258
inline int ff_h263_round_chroma(int x){
3259
    if (x >= 0)
3260
        return  (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3261
    else {
3262
        x = -x;
3263
        return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1));
3264
    }
3265
}
3266

    
3267
/**
3268
 * h263 chorma 4mv motion compensation.
3269
 */
3270
static inline void chroma_4mv_motion(MpegEncContext *s,
3271
                                     uint8_t *dest_cb, uint8_t *dest_cr,
3272
                                     uint8_t **ref_picture,
3273
                                     op_pixels_func *pix_op,
3274
                                     int mx, int my){
3275
    int dxy, emu=0, src_x, src_y, offset;
3276
    uint8_t *ptr;
3277

    
3278
    /* In case of 8X8, we construct a single chroma motion vector
3279
       with a special rounding */
3280
    mx= ff_h263_round_chroma(mx);
3281
    my= ff_h263_round_chroma(my);
3282

    
3283
    dxy = ((my & 1) << 1) | (mx & 1);
3284
    mx >>= 1;
3285
    my >>= 1;
3286

    
3287
    src_x = s->mb_x * 8 + mx;
3288
    src_y = s->mb_y * 8 + my;
3289
    src_x = clip(src_x, -8, s->width/2);
3290
    if (src_x == s->width/2)
3291
        dxy &= ~1;
3292
    src_y = clip(src_y, -8, s->height/2);
3293
    if (src_y == s->height/2)
3294
        dxy &= ~2;
3295

    
3296
    offset = (src_y * (s->uvlinesize)) + src_x;
3297
    ptr = ref_picture[1] + offset;
3298
    if(s->flags&CODEC_FLAG_EMU_EDGE){
3299
        if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
3300
           || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
3301
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3302
            ptr= s->edge_emu_buffer;
3303
            emu=1;
3304
        }
3305
    }
3306
    pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
3307

    
3308
    ptr = ref_picture[2] + offset;
3309
    if(emu){
3310
        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
3311
        ptr= s->edge_emu_buffer;
3312
    }
3313
    pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
3314
}
3315

    
3316
static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
3317
                                     uint8_t *dest_cb, uint8_t *dest_cr,
3318
                                     uint8_t **ref_picture,
3319
                                     h264_chroma_mc_func *pix_op,
3320
                                     int mx, int my){
3321
    const int lowres= s->avctx->lowres;
3322
    const int block_s= 8>>lowres;
3323
    const int s_mask= (2<<lowres)-1;
3324
    const int h_edge_pos = s->h_edge_pos >> (lowres+1);
3325
    const int v_edge_pos = s->v_edge_pos >> (lowres+1);
3326
    int emu=0, src_x, src_y, offset, sx, sy;
3327
    uint8_t *ptr;
3328

    
3329
    if(s->quarter_sample){
3330
        mx/=2;
3331
        my/=2;
3332
    }
3333

    
3334
    /* In case of 8X8, we construct a single chroma motion vector
3335
       with a special rounding */
3336
    mx= ff_h263_round_chroma(mx);
3337
    my= ff_h263_round_chroma(my);
3338

    
3339
    sx= mx & s_mask;
3340
    sy= my & s_mask;
3341
    src_x = s->mb_x*block_s + (mx >> (lowres+1));
3342
    src_y = s->mb_y*block_s + (my >> (lowres+1));
3343

    
3344
    offset = src_y * s->uvlinesize + src_x;
3345
    ptr = ref_picture[1] + offset;
3346
    if(s->flags&CODEC_FLAG_EMU_EDGE){
3347
        if(   (unsigned)src_x > h_edge_pos - (!!sx) - block_s
3348
           || (unsigned)src_y > v_edge_pos - (!!sy) - block_s){
3349
            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3350
            ptr= s->edge_emu_buffer;
3351
            emu=1;
3352
        }
3353
    }
3354
    sx <<= 2 - lowres;
3355
    sy <<= 2 - lowres;
3356
    pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
3357

    
3358
    ptr = ref_picture[2] + offset;
3359
    if(emu){
3360
        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
3361
        ptr= s->edge_emu_buffer;
3362
    }
3363
    pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3364
}
3365

    
3366
static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3367
    /* fetch pixels for estimated mv 4 macroblocks ahead
3368
     * optimized for 64byte cache lines */
3369
    const int shift = s->quarter_sample ? 2 : 1;
3370
    const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3371
    const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3372
    int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3373
    s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3374
    off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3375
    s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3376
}
3377

    
3378
/**
3379
 * motion compensation of a single macroblock
3380
 * @param s context
3381
 * @param dest_y luma destination pointer
3382
 * @param dest_cb chroma cb/u destination pointer
3383
 * @param dest_cr chroma cr/v destination pointer
3384
 * @param dir direction (0->forward, 1->backward)
3385
 * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3386
 * @param pic_op halfpel motion compensation function (average or put normally)
3387
 * @param pic_op qpel motion compensation function (average or put normally)
3388
 * the motion vectors are taken from s->mv and the MV type from s->mv_type
3389
 */
3390
static inline void MPV_motion(MpegEncContext *s,
3391
                              uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3392
                              int dir, uint8_t **ref_picture,
3393
                              op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
3394
{
3395
    int dxy, mx, my, src_x, src_y, motion_x, motion_y;
3396
    int mb_x, mb_y, i;
3397
    uint8_t *ptr, *dest;
3398

    
3399
    mb_x = s->mb_x;
3400
    mb_y = s->mb_y;
3401

    
3402
    prefetch_motion(s, ref_picture, dir);
3403

    
3404
    if(s->obmc && s->pict_type != B_TYPE){
3405
        int16_t mv_cache[4][4][2];
3406
        const int xy= s->mb_x + s->mb_y*s->mb_stride;
3407
        const int mot_stride= s->b8_stride;
3408
        const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
3409

    
3410
        assert(!s->mb_skipped);
3411

    
3412
        memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
3413
        memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3414
        memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
3415

    
3416
        if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
3417
            memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
3418
        }else{
3419
            memcpy(mv_cache[0][1], s->current_picture.motion_val[0][mot_xy-mot_stride], sizeof(int16_t)*4);
3420
        }
3421

    
3422
        if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
3423
            *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
3424
            *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
3425
        }else{
3426
            *(int32_t*)mv_cache[1][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1];
3427
            *(int32_t*)mv_cache[2][0]= *(int32_t*)s->current_picture.motion_val[0][mot_xy-1+mot_stride];
3428
        }
3429

    
3430
        if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
3431
            *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
3432
            *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
3433
        }else{
3434
            *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
3435
            *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
3436
        }
3437

    
3438
        mx = 0;
3439
        my = 0;
3440
        for(i=0;i<4;i++) {
3441
            const int x= (i&1)+1;
3442
            const int y= (i>>1)+1;
3443
            int16_t mv[5][2]= {
3444
                {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
3445
                {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
3446
                {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
3447
                {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
3448
                {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
3449
            //FIXME cleanup
3450
            obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3451
                        ref_picture[0],
3452
                        mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3453
                        pix_op[1],
3454
                        mv);
3455

    
3456
            mx += mv[0][0];
3457
            my += mv[0][1];
3458
        }
3459
        if(!(s->flags&CODEC_FLAG_GRAY))
3460
            chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3461

    
3462
        return;
3463
    }
3464

    
3465
    switch(s->mv_type) {
3466
    case MV_TYPE_16X16:
3467
        if(s->mcsel){
3468
            if(s->real_sprite_warping_points==1){
3469
                gmc1_motion(s, dest_y, dest_cb, dest_cr,
3470
                            ref_picture);
3471
            }else{
3472
                gmc_motion(s, dest_y, dest_cb, dest_cr,
3473
                            ref_picture);
3474
            }
3475
        }else if(s->quarter_sample){
3476
            qpel_motion(s, dest_y, dest_cb, dest_cr,
3477
                        0, 0, 0,
3478
                        ref_picture, pix_op, qpix_op,
3479
                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
3480
        }else if(s->mspel){
3481
            ff_mspel_motion(s, dest_y, dest_cb, dest_cr,
3482
                        ref_picture, pix_op,
3483
                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
3484
        }else
3485
        {
3486
            mpeg_motion(s, dest_y, dest_cb, dest_cr,
3487
                        0, 0, 0,
3488
                        ref_picture, pix_op,
3489
                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
3490
        }
3491
        break;
3492
    case MV_TYPE_8X8:
3493
        mx = 0;
3494
        my = 0;
3495
        if(s->quarter_sample){
3496
            for(i=0;i<4;i++) {
3497
                motion_x = s->mv[dir][i][0];
3498
                motion_y = s->mv[dir][i][1];
3499

    
3500
                dxy = ((motion_y & 3) << 2) | (motion_x & 3);
3501
                src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
3502
                src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
3503

    
3504
                /* WARNING: do no forget half pels */
3505
                src_x = clip(src_x, -16, s->width);
3506
                if (src_x == s->width)
3507
                    dxy &= ~3;
3508
                src_y = clip(src_y, -16, s->height);
3509
                if (src_y == s->height)
3510
                    dxy &= ~12;
3511

    
3512
                ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
3513
                if(s->flags&CODEC_FLAG_EMU_EDGE){
3514
                    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
3515
                       || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
3516
                        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
3517
                        ptr= s->edge_emu_buffer;
3518
                    }
3519
                }
3520
                dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
3521
                qpix_op[1][dxy](dest, ptr, s->linesize);
3522

    
3523
                mx += s->mv[dir][i][0]/2;
3524
                my += s->mv[dir][i][1]/2;
3525
            }
3526
        }else{
3527
            for(i=0;i<4;i++) {
3528
                hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
3529
                            ref_picture[0], 0, 0,
3530
                            mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
3531
                            s->width, s->height, s->linesize,
3532
                            s->h_edge_pos, s->v_edge_pos,
3533
                            8, 8, pix_op[1],
3534
                            s->mv[dir][i][0], s->mv[dir][i][1]);
3535

    
3536
                mx += s->mv[dir][i][0];
3537
                my += s->mv[dir][i][1];
3538
            }
3539
        }
3540

    
3541
        if(!(s->flags&CODEC_FLAG_GRAY))
3542
            chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
3543
        break;
3544
    case MV_TYPE_FIELD:
3545
        if (s->picture_structure == PICT_FRAME) {
3546
            if(s->quarter_sample){
3547
                for(i=0; i<2; i++){
3548
                    qpel_motion(s, dest_y, dest_cb, dest_cr,
3549
                                1, i, s->field_select[dir][i],
3550
                                ref_picture, pix_op, qpix_op,
3551
                                s->mv[dir][i][0], s->mv[dir][i][1], 8);
3552
                }
3553
            }else{
3554
                /* top field */
3555
                mpeg_motion(s, dest_y, dest_cb, dest_cr,
3556
                            1, 0, s->field_select[dir][0],
3557
                            ref_picture, pix_op,
3558
                            s->mv[dir][0][0], s->mv[dir][0][1], 8);
3559
                /* bottom field */
3560
                mpeg_motion(s, dest_y, dest_cb, dest_cr,
3561
                            1, 1, s->field_select[dir][1],
3562
                            ref_picture, pix_op,
3563
                            s->mv[dir][1][0], s->mv[dir][1][1], 8);
3564
            }
3565
        } else {
3566
            if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3567
                ref_picture= s->current_picture_ptr->data;
3568
            }
3569

    
3570
            mpeg_motion(s, dest_y, dest_cb, dest_cr,
3571
                        0, 0, s->field_select[dir][0],
3572
                        ref_picture, pix_op,
3573
                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
3574
        }
3575
        break;
3576
    case MV_TYPE_16X8:
3577
        for(i=0; i<2; i++){
3578
            uint8_t ** ref2picture;
3579

    
3580
            if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3581
                ref2picture= ref_picture;
3582
            }else{
3583
                ref2picture= s->current_picture_ptr->data;
3584
            }
3585

    
3586
            mpeg_motion(s, dest_y, dest_cb, dest_cr,
3587
                        0, 0, s->field_select[dir][i],
3588
                        ref2picture, pix_op,
3589
                        s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
3590

    
3591
            dest_y += 16*s->linesize;
3592
            dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
3593
            dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
3594
        }
3595
        break;
3596
    case MV_TYPE_DMV:
3597
        if(s->picture_structure == PICT_FRAME){
3598
            for(i=0; i<2; i++){
3599
                int j;
3600
                for(j=0; j<2; j++){
3601
                    mpeg_motion(s, dest_y, dest_cb, dest_cr,
3602
                                1, j, j^i,
3603
                                ref_picture, pix_op,
3604
                                s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
3605
                }
3606
                pix_op = s->dsp.avg_pixels_tab;
3607
            }
3608
        }else{
3609
            for(i=0; i<2; i++){
3610
                mpeg_motion(s, dest_y, dest_cb, dest_cr,
3611
                            0, 0, s->picture_structure != i+1,
3612
                            ref_picture, pix_op,
3613
                            s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
3614

    
3615
                // after put we make avg of the same block
3616
                pix_op=s->dsp.avg_pixels_tab;
3617

    
3618
                //opposite parity is always in the same frame if this is second field
3619
                if(!s->first_field){
3620
                    ref_picture = s->current_picture_ptr->data;
3621
                }
3622
            }
3623
        }
3624
    break;
3625
    default: assert(0);
3626
    }
3627
}
3628

    
3629
/**
3630
 * motion compensation of a single macroblock
3631
 * @param s context
3632
 * @param dest_y luma destination pointer
3633
 * @param dest_cb chroma cb/u destination pointer
3634
 * @param dest_cr chroma cr/v destination pointer
3635
 * @param dir direction (0->forward, 1->backward)
3636
 * @param ref_picture array[3] of pointers to the 3 planes of the reference picture
3637
 * @param pic_op halfpel motion compensation function (average or put normally)
3638
 * the motion vectors are taken from s->mv and the MV type from s->mv_type
3639
 */
3640
static inline void MPV_motion_lowres(MpegEncContext *s,
3641
                              uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
3642
                              int dir, uint8_t **ref_picture,
3643
                              h264_chroma_mc_func *pix_op)
3644
{
3645
    int mx, my;
3646
    int mb_x, mb_y, i;
3647
    const int lowres= s->avctx->lowres;
3648
    const int block_s= 8>>lowres;
3649

    
3650
    mb_x = s->mb_x;
3651
    mb_y = s->mb_y;
3652

    
3653
    switch(s->mv_type) {
3654
    case MV_TYPE_16X16:
3655
        mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3656
                    0, 0, 0,
3657
                    ref_picture, pix_op,
3658
                    s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3659
        break;
3660
    case MV_TYPE_8X8:
3661
        mx = 0;
3662
        my = 0;
3663
            for(i=0;i<4;i++) {
3664
                hpel_motion_lowres(s, dest_y + ((i & 1) + (i >> 1) * s->linesize)*block_s,
3665
                            ref_picture[0], 0, 0,
3666
                            (2*mb_x + (i & 1))*block_s, (2*mb_y + (i >>1))*block_s,
3667
                            s->width, s->height, s->linesize,
3668
                            s->h_edge_pos >> lowres, s->v_edge_pos >> lowres,
3669
                            block_s, block_s, pix_op,
3670
                            s->mv[dir][i][0], s->mv[dir][i][1]);
3671

    
3672
                mx += s->mv[dir][i][0];
3673
                my += s->mv[dir][i][1];
3674
            }
3675

    
3676
        if(!(s->flags&CODEC_FLAG_GRAY))
3677
            chroma_4mv_motion_lowres(s, dest_cb, dest_cr, ref_picture, pix_op, mx, my);
3678
        break;
3679
    case MV_TYPE_FIELD:
3680
        if (s->picture_structure == PICT_FRAME) {
3681
            /* top field */
3682
            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3683
                        1, 0, s->field_select[dir][0],
3684
                        ref_picture, pix_op,
3685
                        s->mv[dir][0][0], s->mv[dir][0][1], block_s);
3686
            /* bottom field */
3687
            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3688
                        1, 1, s->field_select[dir][1],
3689
                        ref_picture, pix_op,
3690
                        s->mv[dir][1][0], s->mv[dir][1][1], block_s);
3691
        } else {
3692
            if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
3693
                ref_picture= s->current_picture_ptr->data;
3694
            }
3695

    
3696
            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3697
                        0, 0, s->field_select[dir][0],
3698
                        ref_picture, pix_op,
3699
                        s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
3700
        }
3701
        break;
3702
    case MV_TYPE_16X8:
3703
        for(i=0; i<2; i++){
3704
            uint8_t ** ref2picture;
3705

    
3706
            if(s->picture_structure == s->field_select[dir][i] + 1 || s->pict_type == B_TYPE || s->first_field){
3707
                ref2picture= ref_picture;
3708
            }else{
3709
                ref2picture= s->current_picture_ptr->data;
3710
            }
3711

    
3712
            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3713
                        0, 0, s->field_select[dir][i],
3714
                        ref2picture, pix_op,
3715
                        s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
3716

    
3717
            dest_y += 2*block_s*s->linesize;
3718
            dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3719
            dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
3720
        }
3721
        break;
3722
    case MV_TYPE_DMV:
3723
        if(s->picture_structure == PICT_FRAME){
3724
            for(i=0; i<2; i++){
3725
                int j;
3726
                for(j=0; j<2; j++){
3727
                    mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3728
                                1, j, j^i,
3729
                                ref_picture, pix_op,
3730
                                s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], block_s);
3731
                }
3732
                pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3733
            }
3734
        }else{
3735
            for(i=0; i<2; i++){
3736
                mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
3737
                            0, 0, s->picture_structure != i+1,
3738
                            ref_picture, pix_op,
3739
                            s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
3740

    
3741
                // after put we make avg of the same block
3742
                pix_op = s->dsp.avg_h264_chroma_pixels_tab;
3743

    
3744
                //opposite parity is always in the same frame if this is second field
3745
                if(!s->first_field){
3746
                    ref_picture = s->current_picture_ptr->data;
3747
                }
3748
            }
3749
        }
3750
    break;
3751
    default: assert(0);
3752
    }
3753
}
3754

    
3755
/* put block[] to dest[] */
3756
static inline void put_dct(MpegEncContext *s,
3757
                           DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3758
{
3759
    s->dct_unquantize_intra(s, block, i, qscale);
3760
    s->dsp.idct_put (dest, line_size, block);
3761
}
3762

    
3763
/* add block[] to dest[] */
3764
static inline void add_dct(MpegEncContext *s,
3765
                           DCTELEM *block, int i, uint8_t *dest, int line_size)
3766
{
3767
    if (s->block_last_index[i] >= 0) {
3768
        s->dsp.idct_add (dest, line_size, block);
3769
    }
3770
}
3771

    
3772
static inline void add_dequant_dct(MpegEncContext *s,
3773
                           DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
3774
{
3775
    if (s->block_last_index[i] >= 0) {
3776
        s->dct_unquantize_inter(s, block, i, qscale);
3777

    
3778
        s->dsp.idct_add (dest, line_size, block);
3779
    }
3780
}
3781

    
3782
/**
3783
 * cleans dc, ac, coded_block for the current non intra MB
3784
 */
3785
void ff_clean_intra_table_entries(MpegEncContext *s)
3786
{
3787
    int wrap = s->b8_stride;
3788
    int xy = s->block_index[0];
3789

    
3790
    s->dc_val[0][xy           ] =
3791
    s->dc_val[0][xy + 1       ] =
3792
    s->dc_val[0][xy     + wrap] =
3793
    s->dc_val[0][xy + 1 + wrap] = 1024;
3794
    /* ac pred */
3795
    memset(s->ac_val[0][xy       ], 0, 32 * sizeof(int16_t));
3796
    memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(int16_t));
3797
    if (s->msmpeg4_version>=3) {
3798
        s->coded_block[xy           ] =
3799
        s->coded_block[xy + 1       ] =
3800
        s->coded_block[xy     + wrap] =
3801
        s->coded_block[xy + 1 + wrap] = 0;
3802
    }
3803
    /* chroma */
3804
    wrap = s->mb_stride;
3805
    xy = s->mb_x + s->mb_y * wrap;
3806
    s->dc_val[1][xy] =
3807
    s->dc_val[2][xy] = 1024;
3808
    /* ac pred */
3809
    memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
3810
    memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
3811

    
3812
    s->mbintra_table[xy]= 0;
3813
}
3814

    
3815
/* generic function called after a macroblock has been parsed by the
3816
   decoder or after it has been encoded by the encoder.
3817

3818
   Important variables used:
3819
   s->mb_intra : true if intra macroblock
3820
   s->mv_dir   : motion vector direction
3821
   s->mv_type  : motion vector type
3822
   s->mv       : motion vector
3823
   s->interlaced_dct : true if interlaced dct used (mpeg2)
3824
 */
3825
static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM block[12][64], int lowres_flag)
3826
{
3827
    int mb_x, mb_y;
3828
    const int mb_xy = s->mb_y * s->mb_stride + s->mb_x;
3829
#ifdef HAVE_XVMC
3830
    if(s->avctx->xvmc_acceleration){
3831
        XVMC_decode_mb(s);//xvmc uses pblocks
3832
        return;
3833
    }
3834
#endif
3835

    
3836
    mb_x = s->mb_x;
3837
    mb_y = s->mb_y;
3838

    
3839
    if(s->avctx->debug&FF_DEBUG_DCT_COEFF) {
3840
       /* save DCT coefficients */
3841
       int i,j;
3842
       DCTELEM *dct = &s->current_picture.dct_coeff[mb_xy*64*6];
3843
       for(i=0; i<6; i++)
3844
           for(j=0; j<64; j++)
3845
               *dct++ = block[i][s->dsp.idct_permutation[j]];
3846
    }
3847

    
3848
    s->current_picture.qscale_table[mb_xy]= s->qscale;
3849

    
3850
    /* update DC predictors for P macroblocks */
3851
    if (!s->mb_intra) {
3852
        if (s->h263_pred || s->h263_aic) {
3853
            if(s->mbintra_table[mb_xy])
3854
                ff_clean_intra_table_entries(s);
3855
        } else {
3856
            s->last_dc[0] =
3857
            s->last_dc[1] =
3858
            s->last_dc[2] = 128 << s->intra_dc_precision;
3859
        }
3860
    }
3861
    else if (s->h263_pred || s->h263_aic)
3862
        s->mbintra_table[mb_xy]=1;
3863

    
3864
    if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
3865
        uint8_t *dest_y, *dest_cb, *dest_cr;
3866
        int dct_linesize, dct_offset;
3867
        op_pixels_func (*op_pix)[4];
3868
        qpel_mc_func (*op_qpix)[16];
3869
        const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
3870
        const int uvlinesize= s->current_picture.linesize[1];
3871
        const int readable= s->pict_type != B_TYPE || s->encoding || s->avctx->draw_horiz_band || lowres_flag;
3872
        const int block_size= lowres_flag ? 8>>s->avctx->lowres : 8;
3873

    
3874
        /* avoid copy if macroblock skipped in last frame too */
3875
        /* skip only during decoding as we might trash the buffers during encoding a bit */
3876
        if(!s->encoding){
3877
            uint8_t *mbskip_ptr = &s->mbskip_table[mb_xy];
3878
            const int age= s->current_picture.age;
3879

    
3880
            assert(age);
3881

    
3882
            if (s->mb_skipped) {
3883
                s->mb_skipped= 0;
3884
                assert(s->pict_type!=I_TYPE);
3885

    
3886
                (*mbskip_ptr) ++; /* indicate that this time we skipped it */
3887
                if(*mbskip_ptr >99) *mbskip_ptr= 99;
3888

    
3889
                /* if previous was skipped too, then nothing to do !  */
3890
                if (*mbskip_ptr >= age && s->current_picture.reference){
3891
                    return;
3892
                }
3893
            } else if(!s->current_picture.reference){
3894
                (*mbskip_ptr) ++; /* increase counter so the age can be compared cleanly */
3895
                if(*mbskip_ptr >99) *mbskip_ptr= 99;
3896
            } else{
3897
                *mbskip_ptr = 0; /* not skipped */
3898
            }
3899
        }
3900

    
3901
        dct_linesize = linesize << s->interlaced_dct;
3902
        dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
3903

    
3904
        if(readable){
3905
            dest_y=  s->dest[0];
3906
            dest_cb= s->dest[1];
3907
            dest_cr= s->dest[2];
3908
        }else{
3909
            dest_y = s->b_scratchpad;
3910
            dest_cb= s->b_scratchpad+16*linesize;
3911
            dest_cr= s->b_scratchpad+32*linesize;
3912
        }
3913

    
3914
        if (!s->mb_intra) {
3915
            /* motion handling */
3916
            /* decoding or more than one mb_type (MC was already done otherwise) */
3917
            if(!s->encoding){
3918
                if(lowres_flag){
3919
                    h264_chroma_mc_func *op_pix = s->dsp.put_h264_chroma_pixels_tab;
3920

    
3921
                    if (s->mv_dir & MV_DIR_FORWARD) {
3922
                        MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix);
3923
                        op_pix = s->dsp.avg_h264_chroma_pixels_tab;
3924
                    }
3925
                    if (s->mv_dir & MV_DIR_BACKWARD) {
3926
                        MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
3927
                    }
3928
                }else{
3929
                    if ((!s->no_rounding) || s->pict_type==B_TYPE){
3930
                        op_pix = s->dsp.put_pixels_tab;
3931
                        op_qpix= s->dsp.put_qpel_pixels_tab;
3932
                    }else{
3933
                        op_pix = s->dsp.put_no_rnd_pixels_tab;
3934
                        op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
3935
                    }
3936
                    if (s->mv_dir & MV_DIR_FORWARD) {
3937
                        MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
3938
                        op_pix = s->dsp.avg_pixels_tab;
3939
                        op_qpix= s->dsp.avg_qpel_pixels_tab;
3940
                    }
3941
                    if (s->mv_dir & MV_DIR_BACKWARD) {
3942
                        MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
3943
                    }
3944
                }
3945
            }
3946

    
3947
            /* skip dequant / idct if we are really late ;) */
3948
            if(s->hurry_up>1) goto skip_idct;
3949
            if(s->avctx->skip_idct){
3950
                if(  (s->avctx->skip_idct >= AVDISCARD_NONREF && s->pict_type == B_TYPE)
3951
                   ||(s->avctx->skip_idct >= AVDISCARD_NONKEY && s->pict_type != I_TYPE)
3952
                   || s->avctx->skip_idct >= AVDISCARD_ALL)
3953
                    goto skip_idct;
3954
            }
3955

    
3956
            /* add dct residue */
3957
            if(s->encoding || !(   s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO
3958
                                || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
3959
                add_dequant_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
3960
                add_dequant_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
3961
                add_dequant_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
3962
                add_dequant_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
3963

    
3964
                if(!(s->flags&CODEC_FLAG_GRAY)){
3965
                    add_dequant_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
3966
                    add_dequant_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
3967
                }
3968
            } else if(s->codec_id != CODEC_ID_WMV2){
3969
                add_dct(s, block[0], 0, dest_y                          , dct_linesize);
3970
                add_dct(s, block[1], 1, dest_y              + block_size, dct_linesize);
3971
                add_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize);
3972
                add_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize);
3973

    
3974
                if(!(s->flags&CODEC_FLAG_GRAY)){
3975
                    if(s->chroma_y_shift){//Chroma420
3976
                        add_dct(s, block[4], 4, dest_cb, uvlinesize);
3977
                        add_dct(s, block[5], 5, dest_cr, uvlinesize);
3978
                    }else{
3979
                        //chroma422
3980
                        dct_linesize = uvlinesize << s->interlaced_dct;
3981
                        dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
3982

    
3983
                        add_dct(s, block[4], 4, dest_cb, dct_linesize);
3984
                        add_dct(s, block[5], 5, dest_cr, dct_linesize);
3985
                        add_dct(s, block[6], 6, dest_cb+dct_offset, dct_linesize);
3986
                        add_dct(s, block[7], 7, dest_cr+dct_offset, dct_linesize);
3987
                        if(!s->chroma_x_shift){//Chroma444
3988
                            add_dct(s, block[8], 8, dest_cb+8, dct_linesize);
3989
                            add_dct(s, block[9], 9, dest_cr+8, dct_linesize);
3990
                            add_dct(s, block[10], 10, dest_cb+8+dct_offset, dct_linesize);
3991
                            add_dct(s, block[11], 11, dest_cr+8+dct_offset, dct_linesize);
3992
                        }
3993
                    }
3994
                }//fi gray
3995
            }
3996
            else{
3997
                ff_wmv2_add_mb(s, block, dest_y, dest_cb, dest_cr);
3998
            }
3999
        } else {
4000
            /* dct only in intra block */
4001
            if(s->encoding || !(s->codec_id==CODEC_ID_MPEG1VIDEO || s->codec_id==CODEC_ID_MPEG2VIDEO)){
4002
                put_dct(s, block[0], 0, dest_y                          , dct_linesize, s->qscale);
4003
                put_dct(s, block[1], 1, dest_y              + block_size, dct_linesize, s->qscale);
4004
                put_dct(s, block[2], 2, dest_y + dct_offset             , dct_linesize, s->qscale);
4005
                put_dct(s, block[3], 3, dest_y + dct_offset + block_size, dct_linesize, s->qscale);
4006

    
4007
                if(!(s->flags&CODEC_FLAG_GRAY)){
4008
                    put_dct(s, block[4], 4, dest_cb, uvlinesize, s->chroma_qscale);
4009
                    put_dct(s, block[5], 5, dest_cr, uvlinesize, s->chroma_qscale);
4010
                }
4011
            }else{
4012
                s->dsp.idct_put(dest_y                          , dct_linesize, block[0]);
4013
                s->dsp.idct_put(dest_y              + block_size, dct_linesize, block[1]);
4014
                s->dsp.idct_put(dest_y + dct_offset             , dct_linesize, block[2]);
4015
                s->dsp.idct_put(dest_y + dct_offset + block_size, dct_linesize, block[3]);
4016

    
4017
                if(!(s->flags&CODEC_FLAG_GRAY)){
4018
                    if(s->chroma_y_shift){
4019
                        s->dsp.idct_put(dest_cb, uvlinesize, block[4]);
4020
                        s->dsp.idct_put(dest_cr, uvlinesize, block[5]);
4021
                    }else{
4022

    
4023
                        dct_linesize = uvlinesize << s->interlaced_dct;
4024
                        dct_offset =(s->interlaced_dct)? uvlinesize : uvlinesize*8;
4025

    
4026
                        s->dsp.idct_put(dest_cb,              dct_linesize, block[4]);
4027
                        s->dsp.idct_put(dest_cr,              dct_linesize, block[5]);
4028
                        s->dsp.idct_put(dest_cb + dct_offset, dct_linesize, block[6]);
4029
                        s->dsp.idct_put(dest_cr + dct_offset, dct_linesize, block[7]);
4030
                        if(!s->chroma_x_shift){//Chroma444
4031
                            s->dsp.idct_put(dest_cb + 8,              dct_linesize, block[8]);
4032
                            s->dsp.idct_put(dest_cr + 8,              dct_linesize, block[9]);
4033
                            s->dsp.idct_put(dest_cb + 8 + dct_offset, dct_linesize, block[10]);
4034
                            s->dsp.idct_put(dest_cr + 8 + dct_offset, dct_linesize, block[11]);
4035
                        }
4036
                    }
4037
                }//gray
4038
            }
4039
        }
4040
skip_idct:
4041
        if(!readable){
4042
            s->dsp.put_pixels_tab[0][0](s->dest[0], dest_y ,   linesize,16);
4043
            s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[1], dest_cb, uvlinesize,16 >> s->chroma_y_shift);
4044
            s->dsp.put_pixels_tab[s->chroma_x_shift][0](s->dest[2], dest_cr, uvlinesize,16 >> s->chroma_y_shift);
4045
        }
4046
    }
4047
}
4048

    
4049
void MPV_decode_mb(MpegEncContext *s, DCTELEM block[12][64]){
4050
    if(s->avctx->lowres) MPV_decode_mb_internal(s, block, 1);
4051
    else                  MPV_decode_mb_internal(s, block, 0);
4052
}
4053

    
4054
#ifdef CONFIG_ENCODERS
4055

    
4056
static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
4057
{
4058
    static const char tab[64]=
4059
        {3,2,2,1,1,1,1,1,
4060
         1,1,1,1,1,1,1,1,
4061
         1,1,1,1,1,1,1,1,
4062
         0,0,0,0,0,0,0,0,
4063
         0,0,0,0,0,0,0,0,
4064
         0,0,0,0,0,0,0,0,
4065
         0,0,0,0,0,0,0,0,
4066
         0,0,0,0,0,0,0,0};
4067
    int score=0;
4068
    int run=0;
4069
    int i;
4070
    DCTELEM *block= s->block[n];
4071
    const int last_index= s->block_last_index[n];
4072
    int skip_dc;
4073

    
4074
    if(threshold<0){
4075
        skip_dc=0;
4076
        threshold= -threshold;
4077
    }else
4078
        skip_dc=1;
4079

    
4080
    /* are all which we could set to zero are allready zero? */
4081
    if(last_index<=skip_dc - 1) return;
4082

    
4083
    for(i=0; i<=last_index; i++){
4084
        const int j = s->intra_scantable.permutated[i];
4085
        const int level = ABS(block[j]);
4086
        if(level==1){
4087
            if(skip_dc && i==0) continue;
4088
            score+= tab[run];
4089
            run=0;
4090
        }else if(level>1){
4091
            return;
4092
        }else{
4093
            run++;
4094
        }
4095
    }
4096
    if(score >= threshold) return;
4097
    for(i=skip_dc; i<=last_index; i++){
4098
        const int j = s->intra_scantable.permutated[i];
4099
        block[j]=0;
4100
    }
4101
    if(block[0]) s->block_last_index[n]= 0;
4102
    else         s->block_last_index[n]= -1;
4103
}
4104

    
4105
static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
4106
{
4107
    int i;
4108
    const int maxlevel= s->max_qcoeff;
4109
    const int minlevel= s->min_qcoeff;
4110
    int overflow=0;
4111

    
4112
    if(s->mb_intra){
4113
        i=1; //skip clipping of intra dc
4114
    }else
4115
        i=0;
4116

    
4117
    for(;i<=last_index; i++){
4118
        const int j= s->intra_scantable.permutated[i];
4119
        int level = block[j];
4120

    
4121
        if     (level>maxlevel){
4122
            level=maxlevel;
4123
            overflow++;
4124
        }else if(level<minlevel){
4125
            level=minlevel;
4126
            overflow++;
4127
        }
4128

    
4129
        block[j]= level;
4130
    }
4131

    
4132
    if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
4133
        av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
4134
}
4135

    
4136
#endif //CONFIG_ENCODERS
4137

    
4138
/**
4139
 *
4140
 * @param h is the normal height, this will be reduced automatically if needed for the last row
4141
 */
4142
void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
4143
    if (s->avctx->draw_horiz_band) {
4144
        AVFrame *src;
4145
        int offset[4];
4146

    
4147
        if(s->picture_structure != PICT_FRAME){
4148
            h <<= 1;
4149
            y <<= 1;
4150
            if(s->first_field  && !(s->avctx->slice_flags&SLICE_FLAG_ALLOW_FIELD)) return;
4151
        }
4152

    
4153
        h= FFMIN(h, s->avctx->height - y);
4154

    
4155
        if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
4156
            src= (AVFrame*)s->current_picture_ptr;
4157
        else if(s->last_picture_ptr)
4158
            src= (AVFrame*)s->last_picture_ptr;
4159
        else
4160
            return;
4161

    
4162
        if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
4163
            offset[0]=
4164
            offset[1]=
4165
            offset[2]=
4166
            offset[3]= 0;
4167
        }else{
4168
            offset[0]= y * s->linesize;;
4169
            offset[1]=
4170
            offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
4171
            offset[3]= 0;
4172
        }
4173

    
4174
        emms_c();
4175

    
4176
        s->avctx->draw_horiz_band(s->avctx, src, offset,
4177
                                  y, s->picture_structure, h);
4178
    }
4179
}
4180

    
4181
void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
4182
    const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
4183
    const int uvlinesize= s->current_picture.linesize[1];
4184
    const int mb_size= 4 - s->avctx->lowres;
4185

    
4186
    s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
4187
    s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
4188
    s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
4189
    s->block_index[3]= s->b8_stride*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
4190
    s->block_index[4]= s->mb_stride*(s->mb_y + 1)                + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4191
    s->block_index[5]= s->mb_stride*(s->mb_y + s->mb_height + 2) + s->b8_stride*s->mb_height*2 + s->mb_x - 1;
4192
    //block_index is not used by mpeg2, so it is not affected by chroma_format
4193

    
4194
    s->dest[0] = s->current_picture.data[0] + ((s->mb_x - 1) << mb_size);
4195
    s->dest[1] = s->current_picture.data[1] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4196
    s->dest[2] = s->current_picture.data[2] + ((s->mb_x - 1) << (mb_size - s->chroma_x_shift));
4197

    
4198
    if(!(s->pict_type==B_TYPE && s->avctx->draw_horiz_band && s->picture_structure==PICT_FRAME))
4199
    {
4200
        s->dest[0] += s->mb_y *   linesize << mb_size;
4201
        s->dest[1] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4202
        s->dest[2] += s->mb_y * uvlinesize << (mb_size - s->chroma_y_shift);
4203
    }
4204
}
4205

    
4206
#ifdef CONFIG_ENCODERS
4207

    
4208
static void get_vissual_weight(int16_t *weight, uint8_t *ptr, int stride){
4209
    int x, y;
4210
//FIXME optimize
4211
    for(y=0; y<8; y++){
4212
        for(x=0; x<8; x++){
4213
            int x2, y2;
4214
            int sum=0;
4215
            int sqr=0;
4216
            int count=0;
4217

    
4218
            for(y2= FFMAX(y-1, 0); y2 < FFMIN(8, y+2); y2++){
4219
                for(x2= FFMAX(x-1, 0); x2 < FFMIN(8, x+2); x2++){
4220
                    int v= ptr[x2 + y2*stride];
4221
                    sum += v;
4222
                    sqr += v*v;
4223
                    count++;
4224
                }
4225
            }
4226
            weight[x + 8*y]= (36*ff_sqrt(count*sqr - sum*sum)) / count;
4227
        }
4228
    }
4229
}
4230

    
4231
static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
4232
{
4233
    int16_t weight[6][64];
4234
    DCTELEM orig[6][64];
4235
    const int mb_x= s->mb_x;
4236
    const int mb_y= s->mb_y;
4237
    int i;
4238
    int skip_dct[6];
4239
    int dct_offset   = s->linesize*8; //default for progressive frames
4240
    uint8_t *ptr_y, *ptr_cb, *ptr_cr;
4241
    int wrap_y, wrap_c;
4242

    
4243
    for(i=0; i<6; i++) skip_dct[i]=0;
4244

    
4245
    if(s->adaptive_quant){
4246
        const int last_qp= s->qscale;
4247
        const int mb_xy= mb_x + mb_y*s->mb_stride;
4248

    
4249
        s->lambda= s->lambda_table[mb_xy];
4250
        update_qscale(s);
4251

    
4252
        if(!(s->flags&CODEC_FLAG_QP_RD)){
4253
            s->dquant= s->qscale - last_qp;
4254

    
4255
            if(s->out_format==FMT_H263){
4256
                s->dquant= clip(s->dquant, -2, 2); //FIXME RD
4257

    
4258
                if(s->codec_id==CODEC_ID_MPEG4){
4259
                    if(!s->mb_intra){
4260
                        if(s->pict_type == B_TYPE){
4261
                            if(s->dquant&1)
4262
                                s->dquant= (s->dquant/2)*2;
4263
                            if(s->mv_dir&MV_DIRECT)
4264
                                s->dquant= 0;
4265
                        }
4266
                        if(s->mv_type==MV_TYPE_8X8)
4267
                            s->dquant=0;
4268
                    }
4269
                }
4270
            }
4271
        }
4272
        ff_set_qscale(s, last_qp + s->dquant);
4273
    }else if(s->flags&CODEC_FLAG_QP_RD)
4274
        ff_set_qscale(s, s->qscale + s->dquant);
4275

    
4276
    wrap_y = s->linesize;
4277
    wrap_c = s->uvlinesize;
4278
    ptr_y = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
4279
    ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
4280
    ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
4281

    
4282
    if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
4283
        uint8_t *ebuf= s->edge_emu_buffer + 32;
4284
        ff_emulated_edge_mc(ebuf            , ptr_y , wrap_y,16,16,mb_x*16,mb_y*16, s->width   , s->height);
4285
        ptr_y= ebuf;
4286
        ff_emulated_edge_mc(ebuf+18*wrap_y  , ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4287
        ptr_cb= ebuf+18*wrap_y;
4288
        ff_emulated_edge_mc(ebuf+18*wrap_y+8, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
4289
        ptr_cr= ebuf+18*wrap_y+8;
4290
    }
4291

    
4292
    if (s->mb_intra) {
4293
        if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4294
            int progressive_score, interlaced_score;
4295

    
4296
            s->interlaced_dct=0;
4297
            progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
4298
                              +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
4299

    
4300
            if(progressive_score > 0){
4301
                interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
4302
                                  +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
4303
                if(progressive_score > interlaced_score){
4304
                    s->interlaced_dct=1;
4305

    
4306
                    dct_offset= wrap_y;
4307
                    wrap_y<<=1;
4308
                }
4309
            }
4310
        }
4311

    
4312
        s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
4313
        s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
4314
        s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
4315
        s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
4316

    
4317
        if(s->flags&CODEC_FLAG_GRAY){
4318
            skip_dct[4]= 1;
4319
            skip_dct[5]= 1;
4320
        }else{
4321
            s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
4322
            s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
4323
        }
4324
    }else{
4325
        op_pixels_func (*op_pix)[4];
4326
        qpel_mc_func (*op_qpix)[16];
4327
        uint8_t *dest_y, *dest_cb, *dest_cr;
4328

    
4329
        dest_y  = s->dest[0];
4330
        dest_cb = s->dest[1];
4331
        dest_cr = s->dest[2];
4332

    
4333
        if ((!s->no_rounding) || s->pict_type==B_TYPE){
4334
            op_pix = s->dsp.put_pixels_tab;
4335
            op_qpix= s->dsp.put_qpel_pixels_tab;
4336
        }else{
4337
            op_pix = s->dsp.put_no_rnd_pixels_tab;
4338
            op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
4339
        }
4340

    
4341
        if (s->mv_dir & MV_DIR_FORWARD) {
4342
            MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
4343
            op_pix = s->dsp.avg_pixels_tab;
4344
            op_qpix= s->dsp.avg_qpel_pixels_tab;
4345
        }
4346
        if (s->mv_dir & MV_DIR_BACKWARD) {
4347
            MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
4348
        }
4349

    
4350
        if(s->flags&CODEC_FLAG_INTERLACED_DCT){
4351
            int progressive_score, interlaced_score;
4352

    
4353
            s->interlaced_dct=0;
4354
            progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
4355
                              +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
4356

    
4357
            if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
4358

    
4359
            if(progressive_score>0){
4360
                interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
4361
                                  +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
4362

    
4363
                if(progressive_score > interlaced_score){
4364
                    s->interlaced_dct=1;
4365

    
4366
                    dct_offset= wrap_y;
4367
                    wrap_y<<=1;
4368
                }
4369
            }
4370
        }
4371

    
4372
        s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
4373
        s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
4374
        s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
4375
        s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
4376

    
4377
        if(s->flags&CODEC_FLAG_GRAY){
4378
            skip_dct[4]= 1;
4379
            skip_dct[5]= 1;
4380
        }else{
4381
            s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
4382
            s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
4383
        }
4384
        /* pre quantization */
4385
        if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
4386
            //FIXME optimize
4387
            if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
4388
            if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
4389
            if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
4390
            if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
4391
            if(s->dsp.sad[1](NULL, ptr_cb              , dest_cb              , wrap_c, 8) < 20*s->qscale) skip_dct[4]= 1;
4392
            if(s->dsp.sad[1](NULL, ptr_cr              , dest_cr              , wrap_c, 8) < 20*s->qscale) skip_dct[5]= 1;
4393
        }
4394
    }
4395

    
4396
    if(s->avctx->quantizer_noise_shaping){
4397
        if(!skip_dct[0]) get_vissual_weight(weight[0], ptr_y                 , wrap_y);
4398
        if(!skip_dct[1]) get_vissual_weight(weight[1], ptr_y              + 8, wrap_y);
4399
        if(!skip_dct[2]) get_vissual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
4400
        if(!skip_dct[3]) get_vissual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
4401
        if(!skip_dct[4]) get_vissual_weight(weight[4], ptr_cb                , wrap_c);
4402
        if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
4403
        memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
4404
    }
4405

    
4406
    /* DCT & quantize */
4407
    assert(s->out_format!=FMT_MJPEG || s->qscale==8);
4408
    {
4409
        for(i=0;i<6;i++) {
4410
            if(!skip_dct[i]){
4411
                int overflow;
4412
                s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
4413
            // FIXME we could decide to change to quantizer instead of clipping
4414
            // JS: I don't think that would be a good idea it could lower quality instead
4415
            //     of improve it. Just INTRADC clipping deserves changes in quantizer
4416
                if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
4417
            }else
4418
                s->block_last_index[i]= -1;
4419
        }
4420
        if(s->avctx->quantizer_noise_shaping){
4421
            for(i=0;i<6;i++) {
4422
                if(!skip_dct[i]){
4423
                    s->block_last_index[i] = dct_quantize_refine(s, s->block[i], weight[i], orig[i], i, s->qscale);
4424
                }
4425
            }
4426
        }
4427

    
4428
        if(s->luma_elim_threshold && !s->mb_intra)
4429
            for(i=0; i<4; i++)
4430
                dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
4431
        if(s->chroma_elim_threshold && !s->mb_intra)
4432
            for(i=4; i<6; i++)
4433
                dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
4434

    
4435
        if(s->flags & CODEC_FLAG_CBP_RD){
4436
            for(i=0;i<6;i++) {
4437
                if(s->block_last_index[i] == -1)
4438
                    s->coded_score[i]= INT_MAX/256;
4439
            }
4440
        }
4441
    }
4442

    
4443
    if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
4444
        s->block_last_index[4]=
4445
        s->block_last_index[5]= 0;
4446
        s->block[4][0]=
4447
        s->block[5][0]= (1024 + s->c_dc_scale/2)/ s->c_dc_scale;
4448
    }
4449

    
4450
    //non c quantize code returns incorrect block_last_index FIXME
4451
    if(s->alternate_scan && s->dct_quantize != dct_quantize_c){
4452
        for(i=0; i<6; i++){
4453
            int j;
4454
            if(s->block_last_index[i]>0){
4455
                for(j=63; j>0; j--){
4456
                    if(s->block[i][ s->intra_scantable.permutated[j] ]) break;
4457
                }
4458
                s->block_last_index[i]= j;
4459
            }
4460
        }
4461
    }
4462

    
4463
    /* huffman encode */
4464
    switch(s->codec_id){ //FIXME funct ptr could be slightly faster
4465
    case CODEC_ID_MPEG1VIDEO:
4466
    case CODEC_ID_MPEG2VIDEO:
4467
        mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
4468
    case CODEC_ID_MPEG4:
4469
        mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4470
    case CODEC_ID_MSMPEG4V2:
4471
    case CODEC_ID_MSMPEG4V3:
4472
    case CODEC_ID_WMV1:
4473
        msmpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
4474
    case CODEC_ID_WMV2:
4475
         ff_wmv2_encode_mb(s, s->block, motion_x, motion_y); break;
4476
#ifdef CONFIG_H261_ENCODER
4477
    case CODEC_ID_H261:
4478
        ff_h261_encode_mb(s, s->block, motion_x, motion_y); break;
4479
#endif
4480
    case CODEC_ID_H263:
4481
    case CODEC_ID_H263P:
4482
    case CODEC_ID_FLV1:
4483
    case CODEC_ID_RV10:
4484
    case CODEC_ID_RV20:
4485
        h263_encode_mb(s, s->block, motion_x, motion_y); break;
4486
    case CODEC_ID_MJPEG:
4487
        mjpeg_encode_mb(s, s->block); break;
4488
    default:
4489
        assert(0);
4490
    }
4491
}
4492

    
4493
#endif //CONFIG_ENCODERS
4494

    
4495
void ff_mpeg_flush(AVCodecContext *avctx){
4496
    int i;
4497
    MpegEncContext *s = avctx->priv_data;
4498

    
4499
    if(s==NULL || s->picture==NULL)
4500
        return;
4501

    
4502
    for(i=0; i<MAX_PICTURE_COUNT; i++){
4503
       if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
4504
                                    || s->picture[i].type == FF_BUFFER_TYPE_USER))
4505
        avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
4506
    }
4507
    s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
4508

    
4509
    s->mb_x= s->mb_y= 0;
4510

    
4511
    s->parse_context.state= -1;
4512
    s->parse_context.frame_start_found= 0;
4513
    s->parse_context.overread= 0;
4514
    s->parse_context.overread_index= 0;
4515
    s->parse_context.index= 0;
4516
    s->parse_context.last_index= 0;
4517
    s->bitstream_buffer_size=0;
4518
}
4519

    
4520
#ifdef CONFIG_ENCODERS
4521
void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
4522
{
4523
    const uint16_t *srcw= (uint16_t*)src;
4524
    int words= length>>4;
4525
    int bits= length&15;
4526
    int i;
4527

    
4528
    if(length==0) return;
4529

    
4530
    if(words < 16){
4531
        for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4532
    }else if(put_bits_count(pb)&7){
4533
        for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
4534
    }else{
4535
        for(i=0; put_bits_count(pb)&31; i++)
4536
            put_bits(pb, 8, src[i]);
4537
        flush_put_bits(pb);
4538
        memcpy(pbBufPtr(pb), src+i, 2*words-i);
4539
        skip_put_bytes(pb, 2*words-i);
4540
    }
4541

    
4542
    put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
4543
}
4544

    
4545
static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
4546
    int i;
4547

    
4548
    memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4549

    
4550
    /* mpeg1 */
4551
    d->mb_skip_run= s->mb_skip_run;
4552
    for(i=0; i<3; i++)
4553
        d->last_dc[i]= s->last_dc[i];
4554

    
4555
    /* statistics */
4556
    d->mv_bits= s->mv_bits;
4557
    d->i_tex_bits= s->i_tex_bits;
4558
    d->p_tex_bits= s->p_tex_bits;
4559
    d->i_count= s->i_count;
4560
    d->f_count= s->f_count;
4561
    d->b_count= s->b_count;
4562
    d->skip_count= s->skip_count;
4563
    d->misc_bits= s->misc_bits;
4564
    d->last_bits= 0;
4565

    
4566
    d->mb_skipped= 0;
4567
    d->qscale= s->qscale;
4568
    d->dquant= s->dquant;
4569
}
4570

    
4571
static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
4572
    int i;
4573

    
4574
    memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
4575
    memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
4576

    
4577
    /* mpeg1 */
4578
    d->mb_skip_run= s->mb_skip_run;
4579
    for(i=0; i<3; i++)
4580
        d->last_dc[i]= s->last_dc[i];
4581

    
4582
    /* statistics */
4583
    d->mv_bits= s->mv_bits;
4584
    d->i_tex_bits= s->i_tex_bits;
4585
    d->p_tex_bits= s->p_tex_bits;
4586
    d->i_count= s->i_count;
4587
    d->f_count= s->f_count;
4588
    d->b_count= s->b_count;
4589
    d->skip_count= s->skip_count;
4590
    d->misc_bits= s->misc_bits;
4591

    
4592
    d->mb_intra= s->mb_intra;
4593
    d->mb_skipped= s->mb_skipped;
4594
    d->mv_type= s->mv_type;
4595
    d->mv_dir= s->mv_dir;
4596
    d->pb= s->pb;
4597
    if(s->data_partitioning){
4598
        d->pb2= s->pb2;
4599
        d->tex_pb= s->tex_pb;
4600
    }
4601
    d->block= s->block;
4602
    for(i=0; i<6; i++)
4603
        d->block_last_index[i]= s->block_last_index[i];
4604
    d->interlaced_dct= s->interlaced_dct;
4605
    d->qscale= s->qscale;
4606
}
4607

    
4608
static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
4609
                           PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
4610
                           int *dmin, int *next_block, int motion_x, int motion_y)
4611
{
4612
    int score;
4613
    uint8_t *dest_backup[3];
4614

    
4615
    copy_context_before_encode(s, backup, type);
4616

    
4617
    s->block= s->blocks[*next_block];
4618
    s->pb= pb[*next_block];
4619
    if(s->data_partitioning){
4620
        s->pb2   = pb2   [*next_block];
4621
        s->tex_pb= tex_pb[*next_block];
4622
    }
4623

    
4624
    if(*next_block){
4625
        memcpy(dest_backup, s->dest, sizeof(s->dest));
4626
        s->dest[0] = s->rd_scratchpad;
4627
        s->dest[1] = s->rd_scratchpad + 16*s->linesize;
4628
        s->dest[2] = s->rd_scratchpad + 16*s->linesize + 8;
4629
        assert(s->linesize >= 32); //FIXME
4630
    }
4631

    
4632
    encode_mb(s, motion_x, motion_y);
4633

    
4634
    score= put_bits_count(&s->pb);
4635
    if(s->data_partitioning){
4636
        score+= put_bits_count(&s->pb2);
4637
        score+= put_bits_count(&s->tex_pb);
4638
    }
4639

    
4640
    if(s->avctx->mb_decision == FF_MB_DECISION_RD){
4641
        MPV_decode_mb(s, s->block);
4642

    
4643
        score *= s->lambda2;
4644
        score += sse_mb(s) << FF_LAMBDA_SHIFT;
4645
    }
4646

    
4647
    if(*next_block){
4648
        memcpy(s->dest, dest_backup, sizeof(s->dest));
4649
    }
4650

    
4651
    if(score<*dmin){
4652
        *dmin= score;
4653
        *next_block^=1;
4654

    
4655
        copy_context_after_encode(best, s, type);
4656
    }
4657
}
4658

    
4659
static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
4660
    uint32_t *sq = squareTbl + 256;
4661
    int acc=0;
4662
    int x,y;
4663

    
4664
    if(w==16 && h==16)
4665
        return s->dsp.sse[0](NULL, src1, src2, stride, 16);
4666
    else if(w==8 && h==8)
4667
        return s->dsp.sse[1](NULL, src1, src2, stride, 8);
4668

    
4669
    for(y=