Statistics
| Branch: | Revision:

ffmpeg / libavcodec / h264.c @ 067ff8b1

History | View | Annotate | Download (148 KB)

1
/*
2
 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 *
19
 */
20
 
21
/**
22
 * @file h264.c
23
 * H.264 / AVC / MPEG4 part10 codec.
24
 * @author Michael Niedermayer <michaelni@gmx.at>
25
 */
26

    
27
#include "common.h"
28
#include "dsputil.h"
29
#include "avcodec.h"
30
#include "mpegvideo.h"
31
#include "h264data.h"
32
#include "golomb.h"
33

    
34
#undef NDEBUG
35
#include <assert.h>
36

    
37
#define interlaced_dct interlaced_dct_is_a_bad_name
38
#define mb_intra mb_intra_isnt_initalized_see_mb_type
39

    
40
#define LUMA_DC_BLOCK_INDEX   25
41
#define CHROMA_DC_BLOCK_INDEX 26
42

    
43
#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
44
#define COEFF_TOKEN_VLC_BITS           8
45
#define TOTAL_ZEROS_VLC_BITS           9
46
#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
47
#define RUN_VLC_BITS                   3
48
#define RUN7_VLC_BITS                  6
49

    
50
#define MAX_SPS_COUNT 32
51
#define MAX_PPS_COUNT 256
52

    
53
#define MAX_MMCO_COUNT 66
54

    
55
/**
56
 * Sequence parameter set
57
 */
58
typedef struct SPS{
59
    
60
    int profile_idc;
61
    int level_idc;
62
    int multiple_slice_groups;         ///< more_than_one_slice_group_allowed_flag
63
    int arbitrary_slice_order;         ///< arbitrary_slice_order_allowed_flag
64
    int redundant_slices;              ///< redundant_slices_allowed_flag
65
    int log2_max_frame_num;            ///< log2_max_frame_num_minus4 + 4
66
    int poc_type;                      ///< pic_order_cnt_type
67
    int log2_max_poc_lsb;              ///< log2_max_pic_order_cnt_lsb_minus4
68
    int delta_pic_order_always_zero_flag;
69
    int offset_for_non_ref_pic;
70
    int offset_for_top_to_bottom_field;
71
    int poc_cycle_length;              ///< num_ref_frames_in_pic_order_cnt_cycle
72
    int ref_frame_count;               ///< num_ref_frames
73
    int required_frame_num_update_behaviour_flag;
74
    int mb_width;                      ///< frame_width_in_mbs_minus1 + 1
75
    int mb_height;                     ///< frame_height_in_mbs_minus1 + 1
76
    int frame_mbs_only_flag;
77
    int mb_aff;                        ///<mb_adaptive_frame_field_flag
78
    int direct_8x8_inference_flag;
79
    int vui_parameters_present_flag;
80
    int sar_width;
81
    int sar_height;
82
    short offset_for_ref_frame[256]; //FIXME dyn aloc?
83
}SPS;
84

    
85
/**
86
 * Picture parameter set
87
 */
88
typedef struct PPS{
89
    int sps_id;
90
    int cabac;                  ///< entropy_coding_mode_flag
91
    int pic_order_present;      ///< pic_order_present_flag
92
    int slice_group_count;      ///< num_slice_groups_minus1 + 1
93
    int mb_slice_group_map_type;
94
    int ref_count[2];           ///< num_ref_idx_l0/1_active_minus1 + 1
95
    int weighted_pred;          ///< weighted_pred_flag
96
    int weighted_bipred_idc;
97
    int init_qp;                ///< pic_init_qp_minus26 + 26
98
    int init_qs;                ///< pic_init_qs_minus26 + 26
99
    int chroma_qp_index_offset;
100
    int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
101
    int constrained_intra_pred; ///< constrained_intra_pred_flag
102
    int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
103
    int crop;                   ///< frame_cropping_flag
104
    int crop_left;              ///< frame_cropping_rect_left_offset
105
    int crop_right;             ///< frame_cropping_rect_right_offset
106
    int crop_top;               ///< frame_cropping_rect_top_offset
107
    int crop_bottom;            ///< frame_cropping_rect_bottom_offset
108
}PPS;
109

    
110
/**
111
 * Memory management control operation opcode.
112
 */
113
typedef enum MMCOOpcode{
114
    MMCO_END=0,
115
    MMCO_SHORT2UNUSED,
116
    MMCO_LONG2UNUSED,
117
    MMCO_SHORT2LONG,
118
    MMCO_SET_MAX_LONG,
119
    MMCO_RESET, 
120
    MMCO_LONG,
121
} MMCOOpcode;
122

    
123
/**
124
 * Memory management control operation.
125
 */
126
typedef struct MMCO{
127
    MMCOOpcode opcode;
128
    int short_frame_num;
129
    int long_index;
130
} MMCO;
131

    
132
/**
133
 * H264Context
134
 */
135
typedef struct H264Context{
136
    MpegEncContext s;
137
    int nal_ref_idc;        
138
    int nal_unit_type;
139
#define NAL_SLICE                1
140
#define NAL_DPA                        2
141
#define NAL_DPB                        3
142
#define NAL_DPC                        4
143
#define NAL_IDR_SLICE                5
144
#define NAL_SEI                        6
145
#define NAL_SPS                        7
146
#define NAL_PPS                        8
147
#define NAL_PICTURE_DELIMITER        9
148
#define NAL_FILTER_DATA                10
149
    uint8_t *rbsp_buffer;
150
    int rbsp_buffer_size;
151

    
152
    int chroma_qp; //QPc
153

    
154
    int prev_mb_skiped; //FIXME remove (IMHO not used)
155

    
156
    //prediction stuff
157
    int chroma_pred_mode;
158
    int intra16x16_pred_mode;
159
    
160
    int8_t intra4x4_pred_mode_cache[5*8];
161
    int8_t (*intra4x4_pred_mode)[8];
162
    void (*pred4x4  [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
163
    void (*pred8x8  [4+3])(uint8_t *src, int stride);
164
    void (*pred16x16[4+3])(uint8_t *src, int stride);
165
    unsigned int topleft_samples_available;
166
    unsigned int top_samples_available;
167
    unsigned int topright_samples_available;
168
    unsigned int left_samples_available;
169

    
170
    /**
171
     * non zero coeff count cache.
172
     * is 64 if not available.
173
     */
174
    uint8_t non_zero_count_cache[6*8];
175
    uint8_t (*non_zero_count)[16];
176

    
177
    /**
178
     * Motion vector cache.
179
     */
180
    int16_t mv_cache[2][5*8][2];
181
    int8_t ref_cache[2][5*8];
182
#define LIST_NOT_USED -1 //FIXME rename?
183
#define PART_NOT_AVAILABLE -2
184
    
185
    /**
186
     * is 1 if the specific list MV&references are set to 0,0,-2.
187
     */
188
    int mv_cache_clean[2];
189

    
190
    int block_offset[16+8];
191
    int chroma_subblock_offset[16]; //FIXME remove
192
    
193
    uint16_t *mb2b_xy; //FIXME are these 4 a good idea?
194
    uint16_t *mb2b8_xy;
195
    int b_stride;
196
    int b8_stride;
197

    
198
    SPS sps_buffer[MAX_SPS_COUNT];
199
    SPS sps; ///< current sps
200
    
201
    PPS pps_buffer[MAX_PPS_COUNT];
202
    /**
203
     * current pps
204
     */
205
    PPS pps; //FIXME move tp Picture perhaps? (->no) do we need that?
206

    
207
    int slice_num;
208
    uint8_t *slice_table_base;
209
    uint8_t *slice_table;      ///< slice_table_base + mb_stride + 1
210
    int slice_type;
211
    int slice_type_fixed;
212
    
213
    //interlacing specific flags
214
    int mb_field_decoding_flag;
215
    
216
    int sub_mb_type[4];
217
    
218
    //POC stuff
219
    int poc_lsb;
220
    int poc_msb;
221
    int delta_poc_bottom;
222
    int delta_poc[2];
223
    int frame_num;
224
    int prev_poc_msb;             ///< poc_msb of the last reference pic for POC type 0
225
    int prev_poc_lsb;             ///< poc_lsb of the last reference pic for POC type 0
226
    int frame_num_offset;         ///< for POC type 2
227
    int prev_frame_num_offset;    ///< for POC type 2
228
    int prev_frame_num;           ///< frame_num of the last pic for POC type 1/2
229

    
230
    /**
231
     * frame_num for frames or 2*frame_num for field pics.
232
     */
233
    int curr_pic_num;
234
    
235
    /**
236
     * max_frame_num or 2*max_frame_num for field pics.
237
     */
238
    int max_pic_num;
239

    
240
    //Weighted pred stuff
241
    int luma_log2_weight_denom;
242
    int chroma_log2_weight_denom;
243
    int luma_weight[2][16];
244
    int luma_offset[2][16];
245
    int chroma_weight[2][16][2];
246
    int chroma_offset[2][16][2];
247
   
248
    //deblock
249
    int disable_deblocking_filter_idc;
250
    int slice_alpha_c0_offset_div2;
251
    int slice_beta_offset_div2;
252
     
253
    int redundant_pic_count;
254
    
255
    int direct_spatial_mv_pred;
256

    
257
    /**
258
     * num_ref_idx_l0/1_active_minus1 + 1
259
     */
260
    int ref_count[2];// FIXME split for AFF
261
    Picture *short_ref[16];
262
    Picture *long_ref[16];
263
    Picture default_ref_list[2][32];
264
    Picture ref_list[2][32]; //FIXME size?
265
    Picture field_ref_list[2][32]; //FIXME size?
266
    
267
    /**
268
     * memory management control operations buffer.
269
     */
270
    MMCO mmco[MAX_MMCO_COUNT];
271
    int mmco_index;
272
    
273
    int long_ref_count;  ///< number of actual long term references
274
    int short_ref_count; ///< number of actual short term references
275
    
276
    //data partitioning
277
    GetBitContext intra_gb;
278
    GetBitContext inter_gb;
279
    GetBitContext *intra_gb_ptr;
280
    GetBitContext *inter_gb_ptr;
281
    
282
    DCTELEM mb[16*24] __align8;
283
}H264Context;
284

    
285
static VLC coeff_token_vlc[4];
286
static VLC chroma_dc_coeff_token_vlc;
287

    
288
static VLC total_zeros_vlc[15];
289
static VLC chroma_dc_total_zeros_vlc[3];
290

    
291
static VLC run_vlc[6];
292
static VLC run7_vlc;
293

    
294
/**
295
 * fill a rectangle.
296
 * @param h height of the recatangle, should be a constant
297
 * @param w width of the recatangle, should be a constant
298
 * @param size the size of val (1 or 4), should be a constant
299
 */
300
static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
301
    uint8_t *p= (uint8_t*)vp;
302
    assert(size==1 || size==4);
303
    
304
    w      *= size;
305
    stride *= size;
306
    
307
//FIXME check what gcc generates for 64 bit on x86 and possible write a 32 bit ver of it
308
    if(w==2 && h==2){
309
        *(uint16_t*)(p + 0)=
310
        *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
311
    }else if(w==2 && h==4){
312
        *(uint16_t*)(p + 0*stride)=
313
        *(uint16_t*)(p + 1*stride)=
314
        *(uint16_t*)(p + 2*stride)=
315
        *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
316
    }else if(w==4 && h==2){
317
        *(uint32_t*)(p + 0*stride)=
318
        *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
319
    }else if(w==4 && h==4){
320
        *(uint32_t*)(p + 0*stride)=
321
        *(uint32_t*)(p + 1*stride)=
322
        *(uint32_t*)(p + 2*stride)=
323
        *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
324
    }else if(w==8 && h==1){
325
        *(uint32_t*)(p + 0)=
326
        *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
327
    }else if(w==8 && h==2){
328
        *(uint32_t*)(p + 0 + 0*stride)=
329
        *(uint32_t*)(p + 4 + 0*stride)=
330
        *(uint32_t*)(p + 0 + 1*stride)=
331
        *(uint32_t*)(p + 4 + 1*stride)=  size==4 ? val : val*0x01010101;
332
    }else if(w==8 && h==4){
333
        *(uint64_t*)(p + 0*stride)=
334
        *(uint64_t*)(p + 1*stride)=
335
        *(uint64_t*)(p + 2*stride)=
336
        *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
337
    }else if(w==16 && h==2){
338
        *(uint64_t*)(p + 0+0*stride)=
339
        *(uint64_t*)(p + 8+0*stride)=
340
        *(uint64_t*)(p + 0+1*stride)=
341
        *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
342
    }else if(w==16 && h==4){
343
        *(uint64_t*)(p + 0+0*stride)=
344
        *(uint64_t*)(p + 8+0*stride)=
345
        *(uint64_t*)(p + 0+1*stride)=
346
        *(uint64_t*)(p + 8+1*stride)=
347
        *(uint64_t*)(p + 0+2*stride)=
348
        *(uint64_t*)(p + 8+2*stride)=
349
        *(uint64_t*)(p + 0+3*stride)=
350
        *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
351
    }else
352
        assert(0);
353
}
354

    
355
static inline void fill_caches(H264Context *h, int mb_type){
356
    MpegEncContext * const s = &h->s;
357
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
358
    int topleft_xy, top_xy, topright_xy, left_xy[2];
359
    int topleft_type, top_type, topright_type, left_type[2];
360
    int left_block[4];
361
    int i;
362

    
363
    //wow what a mess, why didnt they simplify the interlacing&intra stuff, i cant imagine that these complex rules are worth it 
364
    
365
    if(h->sps.mb_aff){
366
    //FIXME
367
    }else{
368
        topleft_xy = mb_xy-1 - s->mb_stride;
369
        top_xy     = mb_xy   - s->mb_stride;
370
        topright_xy= mb_xy+1 - s->mb_stride;
371
        left_xy[0]   = mb_xy-1;
372
        left_xy[1]   = mb_xy-1;
373
        left_block[0]= 0;
374
        left_block[1]= 1;
375
        left_block[2]= 2;
376
        left_block[3]= 3;
377
    }
378

    
379
    topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
380
    top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
381
    topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
382
    left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
383
    left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
384

    
385
    if(IS_INTRA(mb_type)){
386
        h->topleft_samples_available= 
387
        h->top_samples_available= 
388
        h->left_samples_available= 0xFFFF;
389
        h->topright_samples_available= 0xEEEA;
390

    
391
        if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
392
            h->topleft_samples_available= 0xB3FF;
393
            h->top_samples_available= 0x33FF;
394
            h->topright_samples_available= 0x26EA;
395
        }
396
        for(i=0; i<2; i++){
397
            if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
398
                h->topleft_samples_available&= 0xDF5F;
399
                h->left_samples_available&= 0x5F5F;
400
            }
401
        }
402
        
403
        if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
404
            h->topleft_samples_available&= 0x7FFF;
405
        
406
        if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
407
            h->topright_samples_available&= 0xFBFF;
408
    
409
        if(IS_INTRA4x4(mb_type)){
410
            if(IS_INTRA4x4(top_type)){
411
                h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
412
                h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
413
                h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
414
                h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
415
            }else{
416
                int pred;
417
                if(IS_INTRA16x16(top_type) || (IS_INTER(top_type) && !h->pps.constrained_intra_pred))
418
                    pred= 2;
419
                else{
420
                    pred= -1;
421
                }
422
                h->intra4x4_pred_mode_cache[4+8*0]=
423
                h->intra4x4_pred_mode_cache[5+8*0]=
424
                h->intra4x4_pred_mode_cache[6+8*0]=
425
                h->intra4x4_pred_mode_cache[7+8*0]= pred;
426
            }
427
            for(i=0; i<2; i++){
428
                if(IS_INTRA4x4(left_type[i])){
429
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
430
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
431
                }else{
432
                    int pred;
433
                    if(IS_INTRA16x16(left_type[i]) || (IS_INTER(left_type[i]) && !h->pps.constrained_intra_pred))
434
                        pred= 2;
435
                    else{
436
                        pred= -1;
437
                    }
438
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
439
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
440
                }
441
            }
442
        }
443
    }
444
    
445
    
446
/*
447
0 . T T. T T T T 
448
1 L . .L . . . . 
449
2 L . .L . . . . 
450
3 . T TL . . . . 
451
4 L . .L . . . . 
452
5 L . .. . . . . 
453
*/
454
//FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
455
    if(top_type){
456
        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][0];
457
        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][1];
458
        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][2];
459
        h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
460
    
461
        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][7];
462
        h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
463
    
464
        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][10];
465
        h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
466
    }else{
467
        h->non_zero_count_cache[4+8*0]=      
468
        h->non_zero_count_cache[5+8*0]=
469
        h->non_zero_count_cache[6+8*0]=
470
        h->non_zero_count_cache[7+8*0]=
471
    
472
        h->non_zero_count_cache[1+8*0]=
473
        h->non_zero_count_cache[2+8*0]=
474
    
475
        h->non_zero_count_cache[1+8*3]=
476
        h->non_zero_count_cache[2+8*3]= 64;
477
    }
478
    
479
    if(left_type[0]){
480
        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][6];
481
        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][5];
482
        h->non_zero_count_cache[0+8*1]= h->non_zero_count[left_xy[0]][9]; //FIXME left_block
483
        h->non_zero_count_cache[0+8*4]= h->non_zero_count[left_xy[0]][12];
484
    }else{
485
        h->non_zero_count_cache[3+8*1]= 
486
        h->non_zero_count_cache[3+8*2]= 
487
        h->non_zero_count_cache[0+8*1]= 
488
        h->non_zero_count_cache[0+8*4]= 64;
489
    }
490
    
491
    if(left_type[1]){
492
        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[1]][4];
493
        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[1]][3];
494
        h->non_zero_count_cache[0+8*2]= h->non_zero_count[left_xy[1]][8];
495
        h->non_zero_count_cache[0+8*5]= h->non_zero_count[left_xy[1]][11];
496
    }else{
497
        h->non_zero_count_cache[3+8*3]= 
498
        h->non_zero_count_cache[3+8*4]= 
499
        h->non_zero_count_cache[0+8*2]= 
500
        h->non_zero_count_cache[0+8*5]= 64;
501
    }
502
    
503
#if 1
504
    if(IS_INTER(mb_type)){
505
        int list;
506
        for(list=0; list<2; list++){
507
            if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list)){
508
                /*if(!h->mv_cache_clean[list]){
509
                    memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
510
                    memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
511
                    h->mv_cache_clean[list]= 1;
512
                }*/
513
                continue; //FIXME direct mode ...
514
            }
515
            h->mv_cache_clean[list]= 0;
516
            
517
            if(IS_INTER(topleft_type)){
518
                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
519
                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
520
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
521
                h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
522
            }else{
523
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
524
                h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
525
            }
526
            
527
            if(IS_INTER(top_type)){
528
                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
529
                const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
530
                *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
531
                *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
532
                *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
533
                *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
534
                h->ref_cache[list][scan8[0] + 0 - 1*8]=
535
                h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
536
                h->ref_cache[list][scan8[0] + 2 - 1*8]=
537
                h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
538
            }else{
539
                *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]= 
540
                *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]= 
541
                *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= 
542
                *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
543
                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
544
            }
545

    
546
            if(IS_INTER(topright_type)){
547
                const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
548
                const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
549
                *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
550
                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
551
            }else{
552
                *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
553
                h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
554
            }
555
            
556
            //FIXME unify cleanup or sth
557
            if(IS_INTER(left_type[0])){
558
                const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
559
                const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
560
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
561
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
562
                h->ref_cache[list][scan8[0] - 1 + 0*8]= 
563
                h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
564
            }else{
565
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
566
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
567
                h->ref_cache[list][scan8[0] - 1 + 0*8]=
568
                h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
569
            }
570
            
571
            if(IS_INTER(left_type[1])){
572
                const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
573
                const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
574
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
575
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
576
                h->ref_cache[list][scan8[0] - 1 + 2*8]= 
577
                h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
578
            }else{
579
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
580
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
581
                h->ref_cache[list][scan8[0] - 1 + 2*8]=
582
                h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
583
            }
584

    
585
            h->ref_cache[list][scan8[5 ]+1] = 
586
            h->ref_cache[list][scan8[7 ]+1] = 
587
            h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewher else)
588
            h->ref_cache[list][scan8[4 ]] = 
589
            h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
590
            *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
591
            *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
592
            *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewher else)
593
            *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
594
            *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
595
        }
596
//FIXME
597

    
598
    }
599
#endif
600
}
601

    
602
static inline void write_back_intra_pred_mode(H264Context *h){
603
    MpegEncContext * const s = &h->s;
604
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
605

    
606
    h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
607
    h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
608
    h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
609
    h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
610
    h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
611
    h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
612
    h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
613
}
614

    
615
/**
616
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
617
 */
618
static inline int check_intra4x4_pred_mode(H264Context *h){
619
    MpegEncContext * const s = &h->s;
620
    static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
621
    static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
622
    int i;
623
    
624
    if(!(h->top_samples_available&0x8000)){
625
        for(i=0; i<4; i++){
626
            int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
627
            if(status<0){
628
                fprintf(stderr, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
629
                return -1;
630
            } else if(status){
631
                h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
632
            }
633
        }
634
    }
635
    
636
    if(!(h->left_samples_available&0x8000)){
637
        for(i=0; i<4; i++){
638
            int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
639
            if(status<0){
640
                fprintf(stderr, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
641
                return -1;
642
            } else if(status){
643
                h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
644
            }
645
        }
646
    }
647

    
648
    return 0;
649
} //FIXME cleanup like next
650

    
651
/**
652
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
653
 */
654
static inline int check_intra_pred_mode(H264Context *h, int mode){
655
    MpegEncContext * const s = &h->s;
656
    static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
657
    static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
658
    
659
    if(!(h->top_samples_available&0x8000)){
660
        mode= top[ mode ];
661
        if(mode<0){
662
            fprintf(stderr, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
663
            return -1;
664
        }
665
    }
666
    
667
    if(!(h->left_samples_available&0x8000)){
668
        mode= left[ mode ];
669
        if(mode<0){
670
            fprintf(stderr, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
671
            return -1;
672
        } 
673
    }
674

    
675
    return mode;
676
}
677

    
678
/**
679
 * gets the predicted intra4x4 prediction mode.
680
 */
681
static inline int pred_intra_mode(H264Context *h, int n){
682
    const int index8= scan8[n];
683
    const int left= h->intra4x4_pred_mode_cache[index8 - 1];
684
    const int top = h->intra4x4_pred_mode_cache[index8 - 8];
685
    const int min= FFMIN(left, top);
686

    
687
    tprintf("mode:%d %d min:%d\n", left ,top, min);
688

    
689
    if(min<0) return DC_PRED;
690
    else      return min;
691
}
692

    
693
static inline void write_back_non_zero_count(H264Context *h){
694
    MpegEncContext * const s = &h->s;
695
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
696

    
697
    h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[4+8*4];
698
    h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[5+8*4];
699
    h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[6+8*4];
700
    h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
701
    h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[7+8*3];
702
    h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[7+8*2];
703
    h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[7+8*1];
704
    
705
    h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[1+8*2];
706
    h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
707
    h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[2+8*1];
708

    
709
    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[1+8*5];
710
    h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
711
    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[2+8*4];
712
}
713

    
714
/**
715
 * gets the predicted number of non zero coefficients.
716
 * @param n block index
717
 */
718
static inline int pred_non_zero_count(H264Context *h, int n){
719
    const int index8= scan8[n];
720
    const int left= h->non_zero_count_cache[index8 - 1];
721
    const int top = h->non_zero_count_cache[index8 - 8];
722
    int i= left + top;
723
    
724
    if(i<64) i= (i+1)>>1;
725

    
726
    tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
727

    
728
    return i&31;
729
}
730

    
731
static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
732
    const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
733

    
734
    if(topright_ref != PART_NOT_AVAILABLE){
735
        *C= h->mv_cache[list][ i - 8 + part_width ];
736
        return topright_ref;
737
    }else{
738
        tprintf("topright MV not available\n");
739

    
740
        *C= h->mv_cache[list][ i - 8 - 1 ];
741
        return h->ref_cache[list][ i - 8 - 1 ];
742
    }
743
}
744

    
745
/**
746
 * gets the predicted MV.
747
 * @param n the block index
748
 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
749
 * @param mx the x component of the predicted motion vector
750
 * @param my the y component of the predicted motion vector
751
 */
752
static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
753
    const int index8= scan8[n];
754
    const int top_ref=      h->ref_cache[list][ index8 - 8 ];
755
    const int left_ref=     h->ref_cache[list][ index8 - 1 ];
756
    const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
757
    const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
758
    const int16_t * C;
759
    int diagonal_ref, match_count;
760

    
761
    assert(part_width==1 || part_width==2 || part_width==4);
762

    
763
/* mv_cache
764
  B . . A T T T T 
765
  U . . L . . , .
766
  U . . L . . . .
767
  U . . L . . , .
768
  . . . L . . . .
769
*/
770

    
771
    diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
772
    match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
773
    
774
    if(match_count > 1){ //most common
775
        *mx= mid_pred(A[0], B[0], C[0]);
776
        *my= mid_pred(A[1], B[1], C[1]);
777
    }else if(match_count==1){
778
        if(left_ref==ref){
779
            *mx= A[0];
780
            *my= A[1];        
781
        }else if(top_ref==ref){
782
            *mx= B[0];
783
            *my= B[1];        
784
        }else{
785
            *mx= C[0];
786
            *my= C[1];        
787
        }
788
    }else{
789
        if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
790
            *mx= A[0];
791
            *my= A[1];        
792
        }else{
793
            *mx= mid_pred(A[0], B[0], C[0]);
794
            *my= mid_pred(A[1], B[1], C[1]);
795
        }
796
    }
797
        
798
    tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
799
}
800

    
801
/**
802
 * gets the directionally predicted 16x8 MV.
803
 * @param n the block index
804
 * @param mx the x component of the predicted motion vector
805
 * @param my the y component of the predicted motion vector
806
 */
807
static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
808
    if(n==0){
809
        const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
810
        const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
811

    
812
        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
813
        
814
        if(top_ref == ref){
815
            *mx= B[0];
816
            *my= B[1];
817
            return;
818
        }
819
    }else{
820
        const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
821
        const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
822
        
823
        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
824

    
825
        if(left_ref == ref){
826
            *mx= A[0];
827
            *my= A[1];
828
            return;
829
        }
830
    }
831

    
832
    //RARE
833
    pred_motion(h, n, 4, list, ref, mx, my);
834
}
835

    
836
/**
837
 * gets the directionally predicted 8x16 MV.
838
 * @param n the block index
839
 * @param mx the x component of the predicted motion vector
840
 * @param my the y component of the predicted motion vector
841
 */
842
static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
843
    if(n==0){
844
        const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
845
        const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
846
        
847
        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
848

    
849
        if(left_ref == ref){
850
            *mx= A[0];
851
            *my= A[1];
852
            return;
853
        }
854
    }else{
855
        const int16_t * C;
856
        int diagonal_ref;
857

    
858
        diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
859
        
860
        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
861

    
862
        if(diagonal_ref == ref){ 
863
            *mx= C[0];
864
            *my= C[1];
865
            return;
866
        }
867
    }
868

    
869
    //RARE
870
    pred_motion(h, n, 2, list, ref, mx, my);
871
}
872

    
873
static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
874
    const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
875
    const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
876

    
877
    tprintf("pred_pskip: (%d) (%d) at %2d %2d", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
878

    
879
    if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
880
       || (top_ref == 0  && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
881
       || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
882
       
883
        *mx = *my = 0;
884
        return;
885
    }
886
        
887
    pred_motion(h, 0, 4, 0, 0, mx, my);
888

    
889
    return;
890
}
891

    
892
static inline void write_back_motion(H264Context *h, int mb_type){
893
    MpegEncContext * const s = &h->s;
894
    const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
895
    const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
896
    int list;
897

    
898
    for(list=0; list<2; list++){
899
        int y;
900
        if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list)){
901
            if(1){ //FIXME skip or never read if mb_type doesnt use it
902
                for(y=0; y<4; y++){
903
                    *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
904
                    *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
905
                }
906
                for(y=0; y<2; y++){
907
                    *(uint16_t*)s->current_picture.motion_val[list][b8_xy + y*h->b8_stride]= (LIST_NOT_USED&0xFF)*0x0101;
908
                }
909
            }
910
            continue; //FIXME direct mode ...
911
        }
912
        
913
        for(y=0; y<4; y++){
914
            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
915
            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
916
        }
917
        for(y=0; y<2; y++){
918
            s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
919
            s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
920
        }
921
    }
922
}
923

    
924
/**
925
 * Decodes a network abstraction layer unit.
926
 * @param consumed is the number of bytes used as input
927
 * @param length is the length of the array
928
 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp ttailing?
929
 * @returns decoded bytes, might be src+1 if no escapes 
930
 */
931
static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
932
    int i, si, di;
933
    uint8_t *dst;
934

    
935
//    src[0]&0x80;                //forbidden bit
936
    h->nal_ref_idc= src[0]>>5;
937
    h->nal_unit_type= src[0]&0x1F;
938

    
939
    src++; length--;
940
#if 0    
941
    for(i=0; i<length; i++)
942
        printf("%2X ", src[i]);
943
#endif
944
    for(i=0; i+1<length; i+=2){
945
        if(src[i]) continue;
946
        if(i>0 && src[i-1]==0) i--;
947
        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
948
            if(src[i+2]!=3){
949
                /* startcode, so we must be past the end */
950
                length=i;
951
            }
952
            break;
953
        }
954
    }
955

    
956
    if(i>=length-1){ //no escaped 0
957
        *dst_length= length;
958
        *consumed= length+1; //+1 for the header
959
        return src; 
960
    }
961

    
962
    h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
963
    dst= h->rbsp_buffer;
964

    
965
//printf("deoding esc\n");
966
    si=di=0;
967
    while(si<length){ 
968
        //remove escapes (very rare 1:2^22)
969
        if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
970
            if(src[si+2]==3){ //escape
971
                dst[di++]= 0;
972
                dst[di++]= 0;
973
                si+=3;
974
            }else //next start code
975
                break;
976
        }
977

    
978
        dst[di++]= src[si++];
979
    }
980

    
981
    *dst_length= di;
982
    *consumed= si + 1;//+1 for the header
983
//FIXME store exact number of bits in the getbitcontext (its needed for decoding)
984
    return dst;
985
}
986

    
987
/**
988
 * @param src the data which should be escaped
989
 * @param dst the target buffer, dst+1 == src is allowed as a special case
990
 * @param length the length of the src data
991
 * @param dst_length the length of the dst array
992
 * @returns length of escaped data in bytes or -1 if an error occured
993
 */
994
static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
995
    int i, escape_count, si, di;
996
    uint8_t *temp;
997
    
998
    assert(length>=0);
999
    assert(dst_length>0);
1000
    
1001
    dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1002

    
1003
    if(length==0) return 1;
1004

    
1005
    escape_count= 0;
1006
    for(i=0; i<length; i+=2){
1007
        if(src[i]) continue;
1008
        if(i>0 && src[i-1]==0) 
1009
            i--;
1010
        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1011
            escape_count++;
1012
            i+=2;
1013
        }
1014
    }
1015
    
1016
    if(escape_count==0){ 
1017
        if(dst+1 != src)
1018
            memcpy(dst+1, src, length);
1019
        return length + 1;
1020
    }
1021
    
1022
    if(length + escape_count + 1> dst_length)
1023
        return -1;
1024

    
1025
    //this should be damn rare (hopefully)
1026

    
1027
    h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1028
    temp= h->rbsp_buffer;
1029
//printf("encoding esc\n");
1030
    
1031
    si= 0;
1032
    di= 0;
1033
    while(si < length){
1034
        if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1035
            temp[di++]= 0; si++;
1036
            temp[di++]= 0; si++;
1037
            temp[di++]= 3; 
1038
            temp[di++]= src[si++];
1039
        }
1040
        else
1041
            temp[di++]= src[si++];
1042
    }
1043
    memcpy(dst+1, temp, length+escape_count);
1044
    
1045
    assert(di == length+escape_count);
1046
    
1047
    return di + 1;
1048
}
1049

    
1050
/**
1051
 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1052
 */
1053
static void encode_rbsp_trailing(PutBitContext *pb){
1054
    int length;
1055
    put_bits(pb, 1, 1);
1056
    length= (-get_bit_count(pb))&7;
1057
    if(length) put_bits(pb, length, 0);
1058
}
1059

    
1060
/**
1061
 * identifies the exact end of the bitstream
1062
 * @return the length of the trailing, or 0 if damaged
1063
 */
1064
static int decode_rbsp_trailing(uint8_t *src){
1065
    int v= *src;
1066
    int r;
1067

    
1068
    tprintf("rbsp trailing %X\n", v);
1069

    
1070
    for(r=1; r<9; r++){
1071
        if(v&1) return r;
1072
        v>>=1;
1073
    }
1074
    return 0;
1075
}
1076

    
1077
/**
1078
 * idct tranforms the 16 dc values and dequantize them.
1079
 * @param qp quantization parameter
1080
 */
1081
static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
1082
    const int qmul= dequant_coeff[qp][0];
1083
#define stride 16
1084
    int i;
1085
    int temp[16]; //FIXME check if this is a good idea
1086
    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1087
    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1088

    
1089
//memset(block, 64, 2*256);
1090
//return;
1091
    for(i=0; i<4; i++){
1092
        const int offset= y_offset[i];
1093
        const int z0= block[offset+stride*0] + block[offset+stride*4];
1094
        const int z1= block[offset+stride*0] - block[offset+stride*4];
1095
        const int z2= block[offset+stride*1] - block[offset+stride*5];
1096
        const int z3= block[offset+stride*1] + block[offset+stride*5];
1097

    
1098
        temp[4*i+0]= z0+z3;
1099
        temp[4*i+1]= z1+z2;
1100
        temp[4*i+2]= z1-z2;
1101
        temp[4*i+3]= z0-z3;
1102
    }
1103

    
1104
    for(i=0; i<4; i++){
1105
        const int offset= x_offset[i];
1106
        const int z0= temp[4*0+i] + temp[4*2+i];
1107
        const int z1= temp[4*0+i] - temp[4*2+i];
1108
        const int z2= temp[4*1+i] - temp[4*3+i];
1109
        const int z3= temp[4*1+i] + temp[4*3+i];
1110

    
1111
        block[stride*0 +offset]= ((z0 + z3)*qmul + 2)>>2; //FIXME think about merging this into decode_resdual
1112
        block[stride*2 +offset]= ((z1 + z2)*qmul + 2)>>2;
1113
        block[stride*8 +offset]= ((z1 - z2)*qmul + 2)>>2;
1114
        block[stride*10+offset]= ((z0 - z3)*qmul + 2)>>2;
1115
    }
1116
}
1117

    
1118
/**
1119
 * dct tranforms the 16 dc values.
1120
 * @param qp quantization parameter ??? FIXME
1121
 */
1122
static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1123
//    const int qmul= dequant_coeff[qp][0];
1124
    int i;
1125
    int temp[16]; //FIXME check if this is a good idea
1126
    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1127
    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1128

    
1129
    for(i=0; i<4; i++){
1130
        const int offset= y_offset[i];
1131
        const int z0= block[offset+stride*0] + block[offset+stride*4];
1132
        const int z1= block[offset+stride*0] - block[offset+stride*4];
1133
        const int z2= block[offset+stride*1] - block[offset+stride*5];
1134
        const int z3= block[offset+stride*1] + block[offset+stride*5];
1135

    
1136
        temp[4*i+0]= z0+z3;
1137
        temp[4*i+1]= z1+z2;
1138
        temp[4*i+2]= z1-z2;
1139
        temp[4*i+3]= z0-z3;
1140
    }
1141

    
1142
    for(i=0; i<4; i++){
1143
        const int offset= x_offset[i];
1144
        const int z0= temp[4*0+i] + temp[4*2+i];
1145
        const int z1= temp[4*0+i] - temp[4*2+i];
1146
        const int z2= temp[4*1+i] - temp[4*3+i];
1147
        const int z3= temp[4*1+i] + temp[4*3+i];
1148

    
1149
        block[stride*0 +offset]= (z0 + z3)>>1;
1150
        block[stride*2 +offset]= (z1 + z2)>>1;
1151
        block[stride*8 +offset]= (z1 - z2)>>1;
1152
        block[stride*10+offset]= (z0 - z3)>>1;
1153
    }
1154
}
1155
#undef xStride
1156
#undef stride
1157

    
1158
static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){
1159
    const int qmul= dequant_coeff[qp][0];
1160
    const int stride= 16*2;
1161
    const int xStride= 16;
1162
    int a,b,c,d,e;
1163

    
1164
    a= block[stride*0 + xStride*0];
1165
    b= block[stride*0 + xStride*1];
1166
    c= block[stride*1 + xStride*0];
1167
    d= block[stride*1 + xStride*1];
1168

    
1169
    e= a-b;
1170
    a= a+b;
1171
    b= c-d;
1172
    c= c+d;
1173

    
1174
    block[stride*0 + xStride*0]= ((a+c)*qmul + 0)>>1;
1175
    block[stride*0 + xStride*1]= ((e+b)*qmul + 0)>>1;
1176
    block[stride*1 + xStride*0]= ((a-c)*qmul + 0)>>1;
1177
    block[stride*1 + xStride*1]= ((e-b)*qmul + 0)>>1;
1178
}
1179

    
1180
static void chroma_dc_dct_c(DCTELEM *block){
1181
    const int stride= 16*2;
1182
    const int xStride= 16;
1183
    int a,b,c,d,e;
1184

    
1185
    a= block[stride*0 + xStride*0];
1186
    b= block[stride*0 + xStride*1];
1187
    c= block[stride*1 + xStride*0];
1188
    d= block[stride*1 + xStride*1];
1189

    
1190
    e= a-b;
1191
    a= a+b;
1192
    b= c-d;
1193
    c= c+d;
1194

    
1195
    block[stride*0 + xStride*0]= (a+c);
1196
    block[stride*0 + xStride*1]= (e+b);
1197
    block[stride*1 + xStride*0]= (a-c);
1198
    block[stride*1 + xStride*1]= (e-b);
1199
}
1200

    
1201
/**
1202
 * gets the chroma qp.
1203
 */
1204
static inline int get_chroma_qp(H264Context *h, int qscale){
1205
    
1206
    return chroma_qp[clip(qscale + h->pps.chroma_qp_index_offset, 0, 51)];
1207
}
1208

    
1209

    
1210
/**
1211
 *
1212
 */
1213
static void h264_add_idct_c(uint8_t *dst, DCTELEM *block, int stride){
1214
    int i;
1215
    uint8_t *cm = cropTbl + MAX_NEG_CROP;
1216

    
1217
    block[0] += 32;
1218
#if 1
1219
    for(i=0; i<4; i++){
1220
        const int z0=  block[i + 4*0]     +  block[i + 4*2];
1221
        const int z1=  block[i + 4*0]     -  block[i + 4*2];
1222
        const int z2= (block[i + 4*1]>>1) -  block[i + 4*3];
1223
        const int z3=  block[i + 4*1]     + (block[i + 4*3]>>1);
1224

    
1225
        block[i + 4*0]= z0 + z3;
1226
        block[i + 4*1]= z1 + z2;
1227
        block[i + 4*2]= z1 - z2;
1228
        block[i + 4*3]= z0 - z3;
1229
    }
1230

    
1231
    for(i=0; i<4; i++){
1232
        const int z0=  block[0 + 4*i]     +  block[2 + 4*i];
1233
        const int z1=  block[0 + 4*i]     -  block[2 + 4*i];
1234
        const int z2= (block[1 + 4*i]>>1) -  block[3 + 4*i];
1235
        const int z3=  block[1 + 4*i]     + (block[3 + 4*i]>>1);
1236

    
1237
        dst[0 + i*stride]= cm[ dst[0 + i*stride] + ((z0 + z3) >> 6) ];
1238
        dst[1 + i*stride]= cm[ dst[1 + i*stride] + ((z1 + z2) >> 6) ];
1239
        dst[2 + i*stride]= cm[ dst[2 + i*stride] + ((z1 - z2) >> 6) ];
1240
        dst[3 + i*stride]= cm[ dst[3 + i*stride] + ((z0 - z3) >> 6) ];
1241
    }
1242
#else
1243
    for(i=0; i<4; i++){
1244
        const int z0=  block[0 + 4*i]     +  block[2 + 4*i];
1245
        const int z1=  block[0 + 4*i]     -  block[2 + 4*i];
1246
        const int z2= (block[1 + 4*i]>>1) -  block[3 + 4*i];
1247
        const int z3=  block[1 + 4*i]     + (block[3 + 4*i]>>1);
1248

    
1249
        block[0 + 4*i]= z0 + z3;
1250
        block[1 + 4*i]= z1 + z2;
1251
        block[2 + 4*i]= z1 - z2;
1252
        block[3 + 4*i]= z0 - z3;
1253
    }
1254

    
1255
    for(i=0; i<4; i++){
1256
        const int z0=  block[i + 4*0]     +  block[i + 4*2];
1257
        const int z1=  block[i + 4*0]     -  block[i + 4*2];
1258
        const int z2= (block[i + 4*1]>>1) -  block[i + 4*3];
1259
        const int z3=  block[i + 4*1]     + (block[i + 4*3]>>1);
1260

    
1261
        dst[i + 0*stride]= cm[ dst[i + 0*stride] + ((z0 + z3) >> 6) ];
1262
        dst[i + 1*stride]= cm[ dst[i + 1*stride] + ((z1 + z2) >> 6) ];
1263
        dst[i + 2*stride]= cm[ dst[i + 2*stride] + ((z1 - z2) >> 6) ];
1264
        dst[i + 3*stride]= cm[ dst[i + 3*stride] + ((z0 - z3) >> 6) ];
1265
    }
1266
#endif
1267
}
1268

    
1269
static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1270
    int i;
1271
    //FIXME try int temp instead of block
1272
    
1273
    for(i=0; i<4; i++){
1274
        const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1275
        const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1276
        const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1277
        const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1278
        const int z0= d0 + d3;
1279
        const int z3= d0 - d3;
1280
        const int z1= d1 + d2;
1281
        const int z2= d1 - d2;
1282
        
1283
        block[0 + 4*i]=   z0 +   z1;
1284
        block[1 + 4*i]= 2*z3 +   z2;
1285
        block[2 + 4*i]=   z0 -   z1;
1286
        block[3 + 4*i]=   z3 - 2*z2;
1287
    }    
1288

    
1289
    for(i=0; i<4; i++){
1290
        const int z0= block[0*4 + i] + block[3*4 + i];
1291
        const int z3= block[0*4 + i] - block[3*4 + i];
1292
        const int z1= block[1*4 + i] + block[2*4 + i];
1293
        const int z2= block[1*4 + i] - block[2*4 + i];
1294
        
1295
        block[0*4 + i]=   z0 +   z1;
1296
        block[1*4 + i]= 2*z3 +   z2;
1297
        block[2*4 + i]=   z0 -   z1;
1298
        block[3*4 + i]=   z3 - 2*z2;
1299
    }
1300
}
1301

    
1302
//FIXME need to check that this doesnt overflow signed 32 bit for low qp, iam not sure, its very close
1303
//FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1304
static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1305
    int i;
1306
    const int * const quant_table= quant_coeff[qscale];
1307
    const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1308
    const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1309
    const unsigned int threshold2= (threshold1<<1);
1310
    int last_non_zero;
1311

    
1312
    if(seperate_dc){
1313
        if(qscale<=18){
1314
            //avoid overflows
1315
            const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1316
            const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1317
            const unsigned int dc_threshold2= (dc_threshold1<<1);
1318

    
1319
            int level= block[0]*quant_coeff[qscale+18][0];
1320
            if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1321
                if(level>0){
1322
                    level= (dc_bias + level)>>(QUANT_SHIFT-2);
1323
                    block[0]= level;
1324
                }else{
1325
                    level= (dc_bias - level)>>(QUANT_SHIFT-2);
1326
                    block[0]= -level;
1327
                }
1328
//                last_non_zero = i;
1329
            }else{
1330
                block[0]=0;
1331
            }
1332
        }else{
1333
            const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1334
            const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1335
            const unsigned int dc_threshold2= (dc_threshold1<<1);
1336

    
1337
            int level= block[0]*quant_table[0];
1338
            if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1339
                if(level>0){
1340
                    level= (dc_bias + level)>>(QUANT_SHIFT+1);
1341
                    block[0]= level;
1342
                }else{
1343
                    level= (dc_bias - level)>>(QUANT_SHIFT+1);
1344
                    block[0]= -level;
1345
                }
1346
//                last_non_zero = i;
1347
            }else{
1348
                block[0]=0;
1349
            }
1350
        }
1351
        last_non_zero= 0;
1352
        i=1;
1353
    }else{
1354
        last_non_zero= -1;
1355
        i=0;
1356
    }
1357

    
1358
    for(; i<16; i++){
1359
        const int j= scantable[i];
1360
        int level= block[j]*quant_table[j];
1361

    
1362
//        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
1363
//           || bias-level >= (1<<(QMAT_SHIFT - 3))){
1364
        if(((unsigned)(level+threshold1))>threshold2){
1365
            if(level>0){
1366
                level= (bias + level)>>QUANT_SHIFT;
1367
                block[j]= level;
1368
            }else{
1369
                level= (bias - level)>>QUANT_SHIFT;
1370
                block[j]= -level;
1371
            }
1372
            last_non_zero = i;
1373
        }else{
1374
            block[j]=0;
1375
        }
1376
    }
1377

    
1378
    return last_non_zero;
1379
}
1380

    
1381
static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1382
    const uint32_t a= ((uint32_t*)(src-stride))[0];
1383
    ((uint32_t*)(src+0*stride))[0]= a;
1384
    ((uint32_t*)(src+1*stride))[0]= a;
1385
    ((uint32_t*)(src+2*stride))[0]= a;
1386
    ((uint32_t*)(src+3*stride))[0]= a;
1387
}
1388

    
1389
static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1390
    ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1391
    ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1392
    ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1393
    ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1394
}
1395

    
1396
static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1397
    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1398
                   + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1399
    
1400
    ((uint32_t*)(src+0*stride))[0]= 
1401
    ((uint32_t*)(src+1*stride))[0]= 
1402
    ((uint32_t*)(src+2*stride))[0]= 
1403
    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
1404
}
1405

    
1406
static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1407
    const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1408
    
1409
    ((uint32_t*)(src+0*stride))[0]= 
1410
    ((uint32_t*)(src+1*stride))[0]= 
1411
    ((uint32_t*)(src+2*stride))[0]= 
1412
    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
1413
}
1414

    
1415
static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1416
    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1417
    
1418
    ((uint32_t*)(src+0*stride))[0]= 
1419
    ((uint32_t*)(src+1*stride))[0]= 
1420
    ((uint32_t*)(src+2*stride))[0]= 
1421
    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
1422
}
1423

    
1424
static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1425
    ((uint32_t*)(src+0*stride))[0]= 
1426
    ((uint32_t*)(src+1*stride))[0]= 
1427
    ((uint32_t*)(src+2*stride))[0]= 
1428
    ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1429
}
1430

    
1431

    
1432
#define LOAD_TOP_RIGHT_EDGE\
1433
    const int t4= topright[0];\
1434
    const int t5= topright[1];\
1435
    const int t6= topright[2];\
1436
    const int t7= topright[3];\
1437

    
1438
#define LOAD_LEFT_EDGE\
1439
    const int l0= src[-1+0*stride];\
1440
    const int l1= src[-1+1*stride];\
1441
    const int l2= src[-1+2*stride];\
1442
    const int l3= src[-1+3*stride];\
1443

    
1444
#define LOAD_TOP_EDGE\
1445
    const int t0= src[ 0-1*stride];\
1446
    const int t1= src[ 1-1*stride];\
1447
    const int t2= src[ 2-1*stride];\
1448
    const int t3= src[ 3-1*stride];\
1449

    
1450
static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1451
    const int lt= src[-1-1*stride];
1452
    LOAD_TOP_EDGE
1453
    LOAD_LEFT_EDGE
1454

    
1455
    src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; 
1456
    src[0+2*stride]=
1457
    src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; 
1458
    src[0+1*stride]=
1459
    src[1+2*stride]=
1460
    src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; 
1461
    src[0+0*stride]=
1462
    src[1+1*stride]=
1463
    src[2+2*stride]=
1464
    src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; 
1465
    src[1+0*stride]=
1466
    src[2+1*stride]=
1467
    src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1468
    src[2+0*stride]=
1469
    src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1470
    src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1471
};
1472

    
1473
static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1474
    LOAD_TOP_EDGE    
1475
    LOAD_TOP_RIGHT_EDGE    
1476
//    LOAD_LEFT_EDGE    
1477

    
1478
    src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1479
    src[1+0*stride]=
1480
    src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1481
    src[2+0*stride]=
1482
    src[1+1*stride]=
1483
    src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1484
    src[3+0*stride]=
1485
    src[2+1*stride]=
1486
    src[1+2*stride]=
1487
    src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1488
    src[3+1*stride]=
1489
    src[2+2*stride]=
1490
    src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1491
    src[3+2*stride]=
1492
    src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1493
    src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1494
};
1495

    
1496
static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1497
    const int lt= src[-1-1*stride];
1498
    LOAD_TOP_EDGE    
1499
    LOAD_LEFT_EDGE    
1500
    const __attribute__((unused)) int unu= l3;
1501

    
1502
    src[0+0*stride]=
1503
    src[1+2*stride]=(lt + t0 + 1)>>1;
1504
    src[1+0*stride]=
1505
    src[2+2*stride]=(t0 + t1 + 1)>>1;
1506
    src[2+0*stride]=
1507
    src[3+2*stride]=(t1 + t2 + 1)>>1;
1508
    src[3+0*stride]=(t2 + t3 + 1)>>1;
1509
    src[0+1*stride]=
1510
    src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1511
    src[1+1*stride]=
1512
    src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
1513
    src[2+1*stride]=
1514
    src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1515
    src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1516
    src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1517
    src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1518
};
1519

    
1520
static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
1521
    LOAD_TOP_EDGE    
1522
    LOAD_TOP_RIGHT_EDGE    
1523
    const __attribute__((unused)) int unu= t7;
1524

    
1525
    src[0+0*stride]=(t0 + t1 + 1)>>1;
1526
    src[1+0*stride]=
1527
    src[0+2*stride]=(t1 + t2 + 1)>>1;
1528
    src[2+0*stride]=
1529
    src[1+2*stride]=(t2 + t3 + 1)>>1;
1530
    src[3+0*stride]=
1531
    src[2+2*stride]=(t3 + t4+ 1)>>1;
1532
    src[3+2*stride]=(t4 + t5+ 1)>>1;
1533
    src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1534
    src[1+1*stride]=
1535
    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1536
    src[2+1*stride]=
1537
    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
1538
    src[3+1*stride]=
1539
    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
1540
    src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
1541
};
1542

    
1543
static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
1544
    LOAD_LEFT_EDGE    
1545

    
1546
    src[0+0*stride]=(l0 + l1 + 1)>>1;
1547
    src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1548
    src[2+0*stride]=
1549
    src[0+1*stride]=(l1 + l2 + 1)>>1;
1550
    src[3+0*stride]=
1551
    src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1552
    src[2+1*stride]=
1553
    src[0+2*stride]=(l2 + l3 + 1)>>1;
1554
    src[3+1*stride]=
1555
    src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
1556
    src[3+2*stride]=
1557
    src[1+3*stride]=
1558
    src[0+3*stride]=
1559
    src[2+2*stride]=
1560
    src[2+3*stride]=
1561
    src[3+3*stride]=l3;
1562
};
1563
    
1564
static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
1565
    const int lt= src[-1-1*stride];
1566
    LOAD_TOP_EDGE    
1567
    LOAD_LEFT_EDGE    
1568
    const __attribute__((unused)) int unu= t3;
1569

    
1570
    src[0+0*stride]=
1571
    src[2+1*stride]=(lt + l0 + 1)>>1;
1572
    src[1+0*stride]=
1573
    src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
1574
    src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
1575
    src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1576
    src[0+1*stride]=
1577
    src[2+2*stride]=(l0 + l1 + 1)>>1;
1578
    src[1+1*stride]=
1579
    src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1580
    src[0+2*stride]=
1581
    src[2+3*stride]=(l1 + l2+ 1)>>1;
1582
    src[1+2*stride]=
1583
    src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1584
    src[0+3*stride]=(l2 + l3 + 1)>>1;
1585
    src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1586
};
1587

    
1588
static void pred16x16_vertical_c(uint8_t *src, int stride){
1589
    int i;
1590
    const uint32_t a= ((uint32_t*)(src-stride))[0];
1591
    const uint32_t b= ((uint32_t*)(src-stride))[1];
1592
    const uint32_t c= ((uint32_t*)(src-stride))[2];
1593
    const uint32_t d= ((uint32_t*)(src-stride))[3];
1594
    
1595
    for(i=0; i<16; i++){
1596
        ((uint32_t*)(src+i*stride))[0]= a;
1597
        ((uint32_t*)(src+i*stride))[1]= b;
1598
        ((uint32_t*)(src+i*stride))[2]= c;
1599
        ((uint32_t*)(src+i*stride))[3]= d;
1600
    }
1601
}
1602

    
1603
static void pred16x16_horizontal_c(uint8_t *src, int stride){
1604
    int i;
1605

    
1606
    for(i=0; i<16; i++){
1607
        ((uint32_t*)(src+i*stride))[0]=
1608
        ((uint32_t*)(src+i*stride))[1]=
1609
        ((uint32_t*)(src+i*stride))[2]=
1610
        ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
1611
    }
1612
}
1613

    
1614
static void pred16x16_dc_c(uint8_t *src, int stride){
1615
    int i, dc=0;
1616

    
1617
    for(i=0;i<16; i++){
1618
        dc+= src[-1+i*stride];
1619
    }
1620
    
1621
    for(i=0;i<16; i++){
1622
        dc+= src[i-stride];
1623
    }
1624

    
1625
    dc= 0x01010101*((dc + 16)>>5);
1626

    
1627
    for(i=0; i<16; i++){
1628
        ((uint32_t*)(src+i*stride))[0]=
1629
        ((uint32_t*)(src+i*stride))[1]=
1630
        ((uint32_t*)(src+i*stride))[2]=
1631
        ((uint32_t*)(src+i*stride))[3]= dc;
1632
    }
1633
}
1634

    
1635
static void pred16x16_left_dc_c(uint8_t *src, int stride){
1636
    int i, dc=0;
1637

    
1638
    for(i=0;i<16; i++){
1639
        dc+= src[-1+i*stride];
1640
    }
1641
    
1642
    dc= 0x01010101*((dc + 8)>>4);
1643

    
1644
    for(i=0; i<16; i++){
1645
        ((uint32_t*)(src+i*stride))[0]=
1646
        ((uint32_t*)(src+i*stride))[1]=
1647
        ((uint32_t*)(src+i*stride))[2]=
1648
        ((uint32_t*)(src+i*stride))[3]= dc;
1649
    }
1650
}
1651

    
1652
static void pred16x16_top_dc_c(uint8_t *src, int stride){
1653
    int i, dc=0;
1654

    
1655
    for(i=0;i<16; i++){
1656
        dc+= src[i-stride];
1657
    }
1658
    dc= 0x01010101*((dc + 8)>>4);
1659

    
1660
    for(i=0; i<16; i++){
1661
        ((uint32_t*)(src+i*stride))[0]=
1662
        ((uint32_t*)(src+i*stride))[1]=
1663
        ((uint32_t*)(src+i*stride))[2]=
1664
        ((uint32_t*)(src+i*stride))[3]= dc;
1665
    }
1666
}
1667

    
1668
static void pred16x16_128_dc_c(uint8_t *src, int stride){
1669
    int i;
1670

    
1671
    for(i=0; i<16; i++){
1672
        ((uint32_t*)(src+i*stride))[0]=
1673
        ((uint32_t*)(src+i*stride))[1]=
1674
        ((uint32_t*)(src+i*stride))[2]=
1675
        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
1676
    }
1677
}
1678

    
1679
static void pred16x16_plane_c(uint8_t *src, int stride){
1680
  int i, j, k;
1681
  int a;
1682
  uint8_t *cm = cropTbl + MAX_NEG_CROP;
1683
  const uint8_t * const src0 = src+7-stride;
1684
  const uint8_t *src1 = src+8*stride-1;
1685
  const uint8_t *src2 = src1-2*stride;      // == src+6*stride-1;
1686
  int H = src0[1] - src0[-1];
1687
  int V = src1[0] - src2[ 0];
1688
  for(k=2; k<=8; ++k) {
1689
    src1 += stride; src2 -= stride;
1690
    H += k*(src0[k] - src0[-k]);
1691
    V += k*(src1[0] - src2[ 0]);
1692
  }
1693
  H = ( 5*H+32 ) >> 6;
1694
  V = ( 5*V+32 ) >> 6;
1695

    
1696
  a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
1697
  for(j=16; j>0; --j) {
1698
    int b = a;
1699
    a += V;
1700
    for(i=-16; i<0; i+=4) {
1701
      src[16+i] = cm[ (b    ) >> 5 ];
1702
      src[17+i] = cm[ (b+  H) >> 5 ];
1703
      src[18+i] = cm[ (b+2*H) >> 5 ];
1704
      src[19+i] = cm[ (b+3*H) >> 5 ];
1705
      b += 4*H;
1706
    }
1707
    src += stride;
1708
  }
1709
}
1710

    
1711
static void pred8x8_vertical_c(uint8_t *src, int stride){
1712
    int i;
1713
    const uint32_t a= ((uint32_t*)(src-stride))[0];
1714
    const uint32_t b= ((uint32_t*)(src-stride))[1];
1715
    
1716
    for(i=0; i<8; i++){
1717
        ((uint32_t*)(src+i*stride))[0]= a;
1718
        ((uint32_t*)(src+i*stride))[1]= b;
1719
    }
1720
}
1721

    
1722
static void pred8x8_horizontal_c(uint8_t *src, int stride){
1723
    int i;
1724

    
1725
    for(i=0; i<8; i++){
1726
        ((uint32_t*)(src+i*stride))[0]=
1727
        ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
1728
    }
1729
}
1730

    
1731
static void pred8x8_128_dc_c(uint8_t *src, int stride){
1732
    int i;
1733

    
1734
    for(i=0; i<4; i++){
1735
        ((uint32_t*)(src+i*stride))[0]= 
1736
        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
1737
    }
1738
    for(i=4; i<8; i++){
1739
        ((uint32_t*)(src+i*stride))[0]= 
1740
        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
1741
    }
1742
}
1743

    
1744
static void pred8x8_left_dc_c(uint8_t *src, int stride){
1745
    int i;
1746
    int dc0, dc2;
1747

    
1748
    dc0=dc2=0;
1749
    for(i=0;i<4; i++){
1750
        dc0+= src[-1+i*stride];
1751
        dc2+= src[-1+(i+4)*stride];
1752
    }
1753
    dc0= 0x01010101*((dc0 + 2)>>2);
1754
    dc2= 0x01010101*((dc2 + 2)>>2);
1755

    
1756
    for(i=0; i<4; i++){
1757
        ((uint32_t*)(src+i*stride))[0]=
1758
        ((uint32_t*)(src+i*stride))[1]= dc0;
1759
    }
1760
    for(i=4; i<8; i++){
1761
        ((uint32_t*)(src+i*stride))[0]=
1762
        ((uint32_t*)(src+i*stride))[1]= dc2;
1763
    }
1764
}
1765

    
1766
static void pred8x8_top_dc_c(uint8_t *src, int stride){
1767
    int i;
1768
    int dc0, dc1;
1769

    
1770
    dc0=dc1=0;
1771
    for(i=0;i<4; i++){
1772
        dc0+= src[i-stride];
1773
        dc1+= src[4+i-stride];
1774
    }
1775
    dc0= 0x01010101*((dc0 + 2)>>2);
1776
    dc1= 0x01010101*((dc1 + 2)>>2);
1777

    
1778
    for(i=0; i<4; i++){
1779
        ((uint32_t*)(src+i*stride))[0]= dc0;
1780
        ((uint32_t*)(src+i*stride))[1]= dc1;
1781
    }
1782
    for(i=4; i<8; i++){
1783
        ((uint32_t*)(src+i*stride))[0]= dc0;
1784
        ((uint32_t*)(src+i*stride))[1]= dc1;
1785
    }
1786
}
1787

    
1788

    
1789
static void pred8x8_dc_c(uint8_t *src, int stride){
1790
    int i;
1791
    int dc0, dc1, dc2, dc3;
1792

    
1793
    dc0=dc1=dc2=0;
1794
    for(i=0;i<4; i++){
1795
        dc0+= src[-1+i*stride] + src[i-stride];
1796
        dc1+= src[4+i-stride];
1797
        dc2+= src[-1+(i+4)*stride];
1798
    }
1799
    dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
1800
    dc0= 0x01010101*((dc0 + 4)>>3);
1801
    dc1= 0x01010101*((dc1 + 2)>>2);
1802
    dc2= 0x01010101*((dc2 + 2)>>2);
1803

    
1804
    for(i=0; i<4; i++){
1805
        ((uint32_t*)(src+i*stride))[0]= dc0;
1806
        ((uint32_t*)(src+i*stride))[1]= dc1;
1807
    }
1808
    for(i=4; i<8; i++){
1809
        ((uint32_t*)(src+i*stride))[0]= dc2;
1810
        ((uint32_t*)(src+i*stride))[1]= dc3;
1811
    }
1812
}
1813

    
1814
static void pred8x8_plane_c(uint8_t *src, int stride){
1815
  int j, k;
1816
  int a;
1817
  uint8_t *cm = cropTbl + MAX_NEG_CROP;
1818
  const uint8_t * const src0 = src+3-stride;
1819
  const uint8_t *src1 = src+4*stride-1;
1820
  const uint8_t *src2 = src1-2*stride;      // == src+2*stride-1;
1821
  int H = src0[1] - src0[-1];
1822
  int V = src1[0] - src2[ 0];
1823
  for(k=2; k<=4; ++k) {
1824
    src1 += stride; src2 -= stride;
1825
    H += k*(src0[k] - src0[-k]);
1826
    V += k*(src1[0] - src2[ 0]);
1827
  }
1828
  H = ( 17*H+16 ) >> 5;
1829
  V = ( 17*V+16 ) >> 5;
1830

    
1831
  a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
1832
  for(j=8; j>0; --j) {
1833
    int b = a;
1834
    a += V;
1835
    src[0] = cm[ (b    ) >> 5 ];
1836
    src[1] = cm[ (b+  H) >> 5 ];
1837
    src[2] = cm[ (b+2*H) >> 5 ];
1838
    src[3] = cm[ (b+3*H) >> 5 ];
1839
    src[4] = cm[ (b+4*H) >> 5 ];
1840
    src[5] = cm[ (b+5*H) >> 5 ];
1841
    src[6] = cm[ (b+6*H) >> 5 ];
1842
    src[7] = cm[ (b+7*H) >> 5 ];
1843
    src += stride;
1844
  }
1845
}
1846

    
1847
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1848
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1849
                           int src_x_offset, int src_y_offset,
1850
                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1851
    MpegEncContext * const s = &h->s;
1852
    const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1853
    const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1854
    const int luma_xy= (mx&3) + ((my&3)<<2);
1855
    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
1856
    uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
1857
    uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
1858
    int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
1859
    int extra_height= extra_width;
1860
    int emu=0;
1861
    const int full_mx= mx>>2;
1862
    const int full_my= my>>2;
1863
    
1864
    assert(pic->data[0]);
1865
    
1866
    if(mx&7) extra_width -= 3;
1867
    if(my&7) extra_height -= 3;
1868
    
1869
    if(   full_mx < 0-extra_width 
1870
       || full_my < 0-extra_height 
1871
       || full_mx + 16/*FIXME*/ > s->width + extra_width 
1872
       || full_my + 16/*FIXME*/ > s->height + extra_height){
1873
        ff_emulated_edge_mc(s, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, s->width, s->height);
1874
            src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
1875
        emu=1;
1876
    }
1877
    
1878
    qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
1879
    if(!square){
1880
        qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
1881
    }
1882
    
1883
    if(s->flags&CODEC_FLAG_GRAY) return;
1884
    
1885
    if(emu){
1886
        ff_emulated_edge_mc(s, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
1887
            src_cb= s->edge_emu_buffer;
1888
    }
1889
    chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
1890

    
1891
    if(emu){
1892
        ff_emulated_edge_mc(s, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
1893
            src_cr= s->edge_emu_buffer;
1894
    }
1895
    chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
1896
}
1897

    
1898
static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1899
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1900
                           int x_offset, int y_offset,
1901
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1902
                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1903
                           int list0, int list1){
1904
    MpegEncContext * const s = &h->s;
1905
    qpel_mc_func *qpix_op=  qpix_put;
1906
    h264_chroma_mc_func chroma_op= chroma_put;
1907
    
1908
    dest_y  += 2*x_offset + 2*y_offset*s->  linesize;
1909
    dest_cb +=   x_offset +   y_offset*s->uvlinesize;
1910
    dest_cr +=   x_offset +   y_offset*s->uvlinesize;
1911
    x_offset += 8*s->mb_x;
1912
    y_offset += 8*s->mb_y;
1913
    
1914
    if(list0){
1915
        Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1916
        mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1917
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
1918
                           qpix_op, chroma_op);
1919

    
1920
        qpix_op=  qpix_avg;
1921
        chroma_op= chroma_avg;
1922
    }
1923

    
1924
    if(list1){
1925
        Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1926
        mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1927
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
1928
                           qpix_op, chroma_op);
1929
    }
1930
}
1931

    
1932
static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1933
                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1934
                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg)){
1935
    MpegEncContext * const s = &h->s;
1936
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1937
    const int mb_type= s->current_picture.mb_type[mb_xy];
1938
    
1939
    assert(IS_INTER(mb_type));
1940
    
1941
    if(IS_16X16(mb_type)){
1942
        mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1943
                qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1944
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1945
    }else if(IS_16X8(mb_type)){
1946
        mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1947
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1948
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1949
        mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1950
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1951
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1952
    }else if(IS_8X16(mb_type)){
1953
        mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
1954
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1955
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1956
        mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
1957
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1958
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1959
    }else{
1960
        int i;
1961
        
1962
        assert(IS_8X8(mb_type));
1963

    
1964
        for(i=0; i<4; i++){
1965
            const int sub_mb_type= h->sub_mb_type[i];
1966
            const int n= 4*i;
1967
            int x_offset= (i&1)<<2;
1968
            int y_offset= (i&2)<<1;
1969

    
1970
            if(IS_SUB_8X8(sub_mb_type)){
1971
                mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1972
                    qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1973
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1974
            }else if(IS_SUB_8X4(sub_mb_type)){
1975
                mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1976
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1977
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1978
                mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1979
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1980
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1981
            }else if(IS_SUB_4X8(sub_mb_type)){
1982
                mc_part(h, n  , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1983
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1984
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1985
                mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1986
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1987
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1988
            }else{
1989
                int j;
1990
                assert(IS_SUB_4X4(sub_mb_type));
1991
                for(j=0; j<4; j++){
1992
                    int sub_x_offset= x_offset + 2*(j&1);
1993
                    int sub_y_offset= y_offset +   (j&2);
1994
                    mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1995
                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1996
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1997
                }
1998
            }
1999
        }
2000
    }
2001
}
2002

    
2003
static void decode_init_vlc(H264Context *h){
2004
    static int done = 0;
2005

    
2006
    if (!done) {
2007
        int i;
2008
        done = 1;
2009

    
2010
        init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5, 
2011
                 &chroma_dc_coeff_token_len [0], 1, 1,
2012
                 &chroma_dc_coeff_token_bits[0], 1, 1);
2013

    
2014
        for(i=0; i<4; i++){
2015
            init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17, 
2016
                     &coeff_token_len [i][0], 1, 1,
2017
                     &coeff_token_bits[i][0], 1, 1);
2018
        }
2019

    
2020
        for(i=0; i<3; i++){
2021
            init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2022
                     &chroma_dc_total_zeros_len [i][0], 1, 1,
2023
                     &chroma_dc_total_zeros_bits[i][0], 1, 1);
2024
        }
2025
        for(i=0; i<15; i++){
2026
            init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16, 
2027
                     &total_zeros_len [i][0], 1, 1,
2028
                     &total_zeros_bits[i][0], 1, 1);
2029
        }
2030

    
2031
        for(i=0; i<6; i++){
2032
            init_vlc(&run_vlc[i], RUN_VLC_BITS, 7, 
2033
                     &run_len [i][0], 1, 1,
2034
                     &run_bits[i][0], 1, 1);
2035
        }
2036
        init_vlc(&run7_vlc, RUN7_VLC_BITS, 16, 
2037
                 &run_len [6][0], 1, 1,
2038
                 &run_bits[6][0], 1, 1);
2039
    }
2040
}
2041

    
2042
/**
2043
 * Sets the intra prediction function pointers.
2044
 */
2045
static void init_pred_ptrs(H264Context *h){
2046
//    MpegEncContext * const s = &h->s;
2047

    
2048
    h->pred4x4[VERT_PRED           ]= pred4x4_vertical_c;
2049
    h->pred4x4[HOR_PRED            ]= pred4x4_horizontal_c;
2050
    h->pred4x4[DC_PRED             ]= pred4x4_dc_c;
2051
    h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2052
    h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2053
    h->pred4x4[VERT_RIGHT_PRED     ]= pred4x4_vertical_right_c;
2054
    h->pred4x4[HOR_DOWN_PRED       ]= pred4x4_horizontal_down_c;
2055
    h->pred4x4[VERT_LEFT_PRED      ]= pred4x4_vertical_left_c;
2056
    h->pred4x4[HOR_UP_PRED         ]= pred4x4_horizontal_up_c;
2057
    h->pred4x4[LEFT_DC_PRED        ]= pred4x4_left_dc_c;
2058
    h->pred4x4[TOP_DC_PRED         ]= pred4x4_top_dc_c;
2059
    h->pred4x4[DC_128_PRED         ]= pred4x4_128_dc_c;
2060

    
2061
    h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_c;
2062
    h->pred8x8[VERT_PRED8x8   ]= pred8x8_vertical_c;
2063
    h->pred8x8[HOR_PRED8x8    ]= pred8x8_horizontal_c;
2064
    h->pred8x8[PLANE_PRED8x8  ]= pred8x8_plane_c;
2065
    h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2066
    h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2067
    h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2068

    
2069
    h->pred16x16[DC_PRED8x8     ]= pred16x16_dc_c;
2070
    h->pred16x16[VERT_PRED8x8   ]= pred16x16_vertical_c;
2071
    h->pred16x16[HOR_PRED8x8    ]= pred16x16_horizontal_c;
2072
    h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_c;
2073
    h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2074
    h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2075
    h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2076
}
2077

    
2078
//FIXME factorize
2079
#define CHECKED_ALLOCZ(p, size)\
2080
{\
2081
    p= av_mallocz(size);\
2082
    if(p==NULL){\
2083
        perror("malloc");\
2084
        goto fail;\
2085
    }\
2086
}
2087

    
2088
static void free_tables(H264Context *h){
2089
    av_freep(&h->intra4x4_pred_mode);
2090
    av_freep(&h->non_zero_count);
2091
    av_freep(&h->slice_table_base);
2092
    h->slice_table= NULL;
2093
    
2094
    av_freep(&h->mb2b_xy);
2095
    av_freep(&h->mb2b8_xy);
2096
}
2097

    
2098
/**
2099
 * allocates tables.
2100
 * needs widzh/height
2101
 */
2102
static int alloc_tables(H264Context *h){
2103
    MpegEncContext * const s = &h->s;
2104
    const int big_mb_num= s->mb_stride * (s->mb_height+1);
2105
    int x,y;
2106

    
2107
    CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
2108
    CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
2109
    CHECKED_ALLOCZ(h->slice_table_base  , big_mb_num * sizeof(uint8_t))
2110

    
2111
    memset(h->slice_table_base, -1, big_mb_num  * sizeof(uint8_t));
2112
    h->slice_table= h->slice_table_base + s->mb_stride + 1;
2113

    
2114
    CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint16_t));
2115
    CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint16_t));
2116
    for(y=0; y<s->mb_height; y++){
2117
        for(x=0; x<s->mb_width; x++){
2118
            const int mb_xy= x + y*s->mb_stride;
2119
            const int b_xy = 4*x + 4*y*h->b_stride;
2120
            const int b8_xy= 2*x + 2*y*h->b8_stride;
2121
        
2122
            h->mb2b_xy [mb_xy]= b_xy;
2123
            h->mb2b8_xy[mb_xy]= b8_xy;
2124
        }
2125
    }
2126
    
2127
    return 0;
2128
fail:
2129
    free_tables(h);
2130
    return -1;
2131
}
2132

    
2133
static void common_init(H264Context *h){
2134
    MpegEncContext * const s = &h->s;
2135

    
2136
    s->width = s->avctx->width;
2137
    s->height = s->avctx->height;
2138
    s->codec_id= s->avctx->codec->id;
2139
    
2140
    init_pred_ptrs(h);
2141

    
2142
    s->decode=1; //FIXME
2143
}
2144

    
2145
static int decode_init(AVCodecContext *avctx){
2146
    H264Context *h= avctx->priv_data;
2147
    MpegEncContext * const s = &h->s;
2148

    
2149
    s->avctx = avctx;
2150
    common_init(h);
2151

    
2152
    s->out_format = FMT_H264;
2153
    s->workaround_bugs= avctx->workaround_bugs;
2154

    
2155
    // set defaults
2156
    s->progressive_sequence=1;
2157
//    s->decode_mb= ff_h263_decode_mb;
2158
    s->low_delay= 1;
2159
    avctx->pix_fmt= PIX_FMT_YUV420P;
2160

    
2161
    decode_init_vlc(h);
2162
    
2163
    return 0;
2164
}
2165

    
2166
static void frame_start(H264Context *h){
2167
    MpegEncContext * const s = &h->s;
2168
    int i;
2169

    
2170
    MPV_frame_start(s, s->avctx);
2171
    ff_er_frame_start(s);
2172
    h->mmco_index=0;
2173

    
2174
    assert(s->linesize && s->uvlinesize);
2175

    
2176
    for(i=0; i<16; i++){
2177
        h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2178
        h->chroma_subblock_offset[i]= 2*((scan8[i] - scan8[0])&7) + 2*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2179
    }
2180
    for(i=0; i<4; i++){
2181
        h->block_offset[16+i]=
2182
        h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2183
    }
2184

    
2185
//    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2186
}
2187

    
2188
static void hl_decode_mb(H264Context *h){
2189
    MpegEncContext * const s = &h->s;
2190
    const int mb_x= s->mb_x;
2191
    const int mb_y= s->mb_y;
2192
    const int mb_xy= mb_x + mb_y*s->mb_stride;
2193
    const int mb_type= s->current_picture.mb_type[mb_xy];
2194
    uint8_t  *dest_y, *dest_cb, *dest_cr;
2195
    int linesize, uvlinesize /*dct_offset*/;
2196
    int i;
2197

    
2198
    if(!s->decode)
2199
        return;
2200

    
2201
    if(s->mb_skiped){
2202
    }
2203

    
2204
    dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
2205
    dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2206
    dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2207

    
2208
    if (h->mb_field_decoding_flag) {
2209
        linesize = s->linesize * 2;
2210
        uvlinesize = s->uvlinesize * 2;
2211
        if(mb_y&1){ //FIXME move out of this func?
2212
            dest_y -= s->linesize*15;
2213
            dest_cb-= s->linesize*7;
2214
            dest_cr-= s->linesize*7;
2215
        }
2216
    } else {
2217
        linesize = s->linesize;
2218
        uvlinesize = s->uvlinesize;
2219
//        dct_offset = s->linesize * 16;
2220
    }
2221

    
2222
    if(IS_INTRA(mb_type)){
2223
        if(!(s->flags&CODEC_FLAG_GRAY)){
2224
            h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2225
            h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2226
        }
2227

    
2228
        if(IS_INTRA4x4(mb_type)){
2229
            if(!s->encoding){
2230
                for(i=0; i<16; i++){
2231
                    uint8_t * const ptr= dest_y + h->block_offset[i];
2232
                    uint8_t *topright= ptr + 4 - linesize;
2233
                    const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2234
                    const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2235
                    int tr;
2236

    
2237
                    if(!topright_avail){
2238
                        tr= ptr[3 - linesize]*0x01010101;
2239
                        topright= (uint8_t*) &tr;
2240
                    }
2241

    
2242
                    h->pred4x4[ dir ](ptr, topright, linesize);
2243
                    if(h->non_zero_count_cache[ scan8[i] ])
2244
                        h264_add_idct_c(ptr, h->mb + i*16, linesize);
2245
                }
2246
            }
2247
        }else{
2248
            h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2249
            h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
2250
        }
2251
    }else{
2252
        hl_motion(h, dest_y, dest_cb, dest_cr,
2253
                  s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab, 
2254
                  s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab);
2255
    }
2256

    
2257

    
2258
    if(!IS_INTRA4x4(mb_type)){
2259
        for(i=0; i<16; i++){
2260
            if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2261
                uint8_t * const ptr= dest_y + h->block_offset[i];
2262
                h264_add_idct_c(ptr, h->mb + i*16, linesize);
2263
            }
2264
        }
2265
    }
2266

    
2267
    if(!(s->flags&CODEC_FLAG_GRAY)){
2268
        chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
2269
        chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
2270
        for(i=16; i<16+4; i++){
2271
            if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2272
                uint8_t * const ptr= dest_cb + h->block_offset[i];
2273
                h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
2274
            }
2275
        }
2276
        for(i=20; i<20+4; i++){
2277
            if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2278
                uint8_t * const ptr= dest_cr + h->block_offset[i];
2279
                h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
2280
            }
2281
        }
2282
    }
2283
}
2284

    
2285
static void decode_mb_cabac(H264Context *h){
2286
//    MpegEncContext * const s = &h->s;
2287
}
2288

    
2289
/**
2290
 * fills the default_ref_list.
2291
 */
2292
static int fill_default_ref_list(H264Context *h){
2293
    MpegEncContext * const s = &h->s;
2294
    int i;
2295
    Picture sorted_short_ref[16];
2296
    
2297
    if(h->slice_type==B_TYPE){
2298
        int out_i;
2299
        int limit= -1;
2300

    
2301
        for(out_i=0; out_i<h->short_ref_count; out_i++){
2302
            int best_i=-1;
2303
            int best_poc=-1;
2304

    
2305
            for(i=0; i<h->short_ref_count; i++){
2306
                const int poc= h->short_ref[i]->poc;
2307
                if(poc > limit && poc < best_poc){
2308
                    best_poc= poc;
2309
                    best_i= i;
2310
                }
2311
            }
2312
            
2313
            assert(best_i != -1);
2314
            
2315
            limit= best_poc;
2316
            sorted_short_ref[out_i]= *h->short_ref[best_i];
2317
        }
2318
    }
2319

    
2320
    if(s->picture_structure == PICT_FRAME){
2321
        if(h->slice_type==B_TYPE){
2322
            const int current_poc= s->current_picture_ptr->poc;
2323
            int list;
2324

    
2325
            for(list=0; list<2; list++){
2326
                int index=0;
2327

    
2328
                for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++){
2329
                    const int i2= list ? h->short_ref_count - i - 1 : i;
2330
                    const int poc= sorted_short_ref[i2].poc;
2331
                    
2332
                    if(sorted_short_ref[i2].reference != 3) continue; //FIXME refernce field shit
2333

    
2334
                    if((list==1 && poc > current_poc) || (list==0 && poc < current_poc)){
2335
                        h->default_ref_list[list][index  ]= sorted_short_ref[i2];
2336
                        h->default_ref_list[list][index++].pic_id= sorted_short_ref[i2].frame_num;
2337
                    }
2338
                }
2339

    
2340
                for(i=0; i<h->long_ref_count && index < h->ref_count[ list ]; i++){
2341
                    if(h->long_ref[i]->reference != 3) continue;
2342

    
2343
                    h->default_ref_list[ list ][index  ]= *h->long_ref[i];
2344
                    h->default_ref_list[ list ][index++].pic_id= i;;
2345
                }
2346
                
2347
                if(h->long_ref_count > 1 && h->short_ref_count==0){
2348
                    Picture temp= h->default_ref_list[1][0];
2349
                    h->default_ref_list[1][0] = h->default_ref_list[1][1];
2350
                    h->default_ref_list[1][0] = temp;
2351
                }
2352

    
2353
                if(index < h->ref_count[ list ])
2354
                    memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
2355
            }
2356
        }else{
2357
            int index=0;
2358
            for(i=0; i<h->short_ref_count && index < h->ref_count[0]; i++){
2359
                if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
2360
                h->default_ref_list[0][index  ]= *h->short_ref[i];
2361
                h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2362
            }
2363
            for(i=0; i<h->long_ref_count && index < h->ref_count[0]; i++){
2364
                if(h->long_ref[i]->reference != 3) continue;
2365
                h->default_ref_list[0][index  ]= *h->long_ref[i];
2366
                h->default_ref_list[0][index++].pic_id= i;;
2367
            }
2368
            if(index < h->ref_count[0])
2369
                memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2370
        }
2371
    }else{ //FIELD
2372
        if(h->slice_type==B_TYPE){
2373
        }else{
2374
            //FIXME second field balh
2375
        }
2376
    }
2377
    return 0;
2378
}
2379

    
2380
static int decode_ref_pic_list_reordering(H264Context *h){
2381
    MpegEncContext * const s = &h->s;
2382
    int list;
2383
    
2384
    if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move beofre func
2385
    
2386
    for(list=0; list<2; list++){
2387
        memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2388

    
2389
        if(get_bits1(&s->gb)){
2390
            int pred= h->curr_pic_num;
2391
            int index;
2392

    
2393
            for(index=0; ; index++){
2394
                int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2395
                int pic_id;
2396
                int i;
2397
                
2398
                
2399
                if(index >= h->ref_count[list]){
2400
                    fprintf(stderr, "reference count overflow\n");
2401
                    return -1;
2402
                }
2403
                
2404
                if(reordering_of_pic_nums_idc<3){
2405
                    if(reordering_of_pic_nums_idc<2){
2406
                        const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2407

    
2408
                        if(abs_diff_pic_num >= h->max_pic_num){
2409
                            fprintf(stderr, "abs_diff_pic_num overflow\n");
2410
                            return -1;
2411
                        }
2412

    
2413
                        if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2414
                        else                                pred+= abs_diff_pic_num;
2415
                        pred &= h->max_pic_num - 1;
2416
                    
2417
                        for(i= h->ref_count[list]-1; i>=index; i--){
2418
                            if(h->ref_list[list][i].pic_id == pred && h->ref_list[list][i].long_ref==0)
2419
                                break;
2420
                        }
2421
                    }else{
2422
                        pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2423

    
2424
                        for(i= h->ref_count[list]-1; i>=index; i--){
2425
                            if(h->ref_list[list][i].pic_id == pic_id && h->ref_list[list][i].long_ref==1)
2426
                                break;
2427
                        }
2428
                    }
2429

    
2430
                    if(i < index){
2431
                        fprintf(stderr, "reference picture missing during reorder\n");
2432
                        memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2433
                    }else if(i > index){
2434
                        Picture tmp= h->ref_list[list][i];
2435
                        for(; i>index; i--){
2436
                            h->ref_list[list][i]= h->ref_list[list][i-1];
2437
                        }
2438
                        h->ref_list[list][index]= tmp;
2439
                    }
2440
                }else if(reordering_of_pic_nums_idc==3) 
2441
                    break;
2442
                else{
2443
                    fprintf(stderr, "illegal reordering_of_pic_nums_idc\n");
2444
                    return -1;
2445
                }
2446
            }
2447
        }
2448

    
2449
        if(h->slice_type!=B_TYPE) break;
2450
    }
2451
    return 0;    
2452
}
2453

    
2454
static int pred_weight_table(H264Context *h){
2455
    MpegEncContext * const s = &h->s;
2456
    int list, i;
2457
    
2458
    h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2459
    h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2460

    
2461
    for(list=0; list<2; list++){
2462
        for(i=0; i<h->ref_count[list]; i++){
2463
            int luma_weight_flag, chroma_weight_flag;
2464
            
2465
            luma_weight_flag= get_bits1(&s->gb);
2466
            if(luma_weight_flag){
2467
                h->luma_weight[list][i]= get_se_golomb(&s->gb);
2468
                h->luma_offset[list][i]= get_se_golomb(&s->gb);
2469
            }
2470

    
2471
            chroma_weight_flag= get_bits1(&s->gb);
2472
            if(chroma_weight_flag){
2473
                int j;
2474
                for(j=0; j<2; j++){
2475
                    h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2476
                    h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2477
                }
2478
            }
2479
        }
2480
        if(h->slice_type != B_TYPE) break;
2481
    }
2482
    return 0;
2483
}
2484

    
2485
/**
2486
 * instantaneos decoder refresh.
2487
 */
2488
static void idr(H264Context *h){
2489
    int i;
2490

    
2491
    for(i=0; i<h->long_ref_count; i++){
2492
        h->long_ref[i]->reference=0;
2493
        h->long_ref[i]= NULL;
2494
    }
2495
    h->long_ref_count=0;
2496

    
2497
    for(i=0; i<h->short_ref_count; i++){
2498
        h->short_ref[i]->reference=0;
2499
        h->short_ref[i]= NULL;
2500
    }
2501
    h->short_ref_count=0;
2502
}
2503

    
2504
/**
2505
 *
2506
 * @return the removed picture or NULL if an error occures
2507
 */
2508
static Picture * remove_short(H264Context *h, int frame_num){
2509
    MpegEncContext * const s = &h->s;
2510
    int i;
2511
    
2512
    if(s->avctx->debug&FF_DEBUG_MMCO)
2513
        printf("remove short %d count %d\n", frame_num, h->short_ref_count);
2514
    
2515
    for(i=0; i<h->short_ref_count; i++){
2516
        Picture *pic= h->short_ref[i];
2517
        if(s->avctx->debug&FF_DEBUG_MMCO)
2518
            printf("%d %d %X\n", i, pic->frame_num, (int)pic);
2519
        if(pic->frame_num == frame_num){
2520
            h->short_ref[i]= NULL;
2521
            memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
2522
            h->short_ref_count--;
2523
            return pic;
2524
        }
2525
    }
2526
    return NULL;
2527
}
2528

    
2529
/**
2530
 *
2531
 * @return the removed picture or NULL if an error occures
2532
 */
2533
static Picture * remove_long(H264Context *h, int i){
2534
    Picture *pic;
2535

    
2536
    if(i >= h->long_ref_count) return NULL;
2537
    pic= h->long_ref[i];
2538
    if(pic==NULL) return NULL;
2539
    
2540
    h->long_ref[i]= NULL;
2541
    memmove(&h->long_ref[i], &h->long_ref[i+1], (h->long_ref_count - i - 1)*sizeof(Picture*));
2542
    h->long_ref_count--;
2543

    
2544
    return pic;
2545
}
2546

    
2547
/**
2548
 * Executes the reference picture marking (memory management control operations).
2549
 */
2550
static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
2551
    MpegEncContext * const s = &h->s;
2552
    int i;
2553
    int current_is_long=0;
2554
    Picture *pic;
2555
    
2556
    if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
2557
        printf("no mmco here\n");
2558
        
2559
    for(i=0; i<mmco_count; i++){
2560
        if(s->avctx->debug&FF_DEBUG_MMCO)
2561
            printf("mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
2562

    
2563
        switch(mmco[i].opcode){
2564
        case MMCO_SHORT2UNUSED:
2565
            pic= remove_short(h, mmco[i].short_frame_num);
2566
            if(pic==NULL) return -1;
2567
            pic->reference= 0;
2568
            break;
2569
        case MMCO_SHORT2LONG:
2570
            pic= remove_long(h, mmco[i].long_index);
2571
            if(pic) pic->reference=0;
2572
            
2573
            h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
2574
            h->long_ref[ mmco[i].long_index ]->long_ref=1;
2575
            break;
2576
        case MMCO_LONG2UNUSED:
2577
            pic= remove_long(h, mmco[i].long_index);
2578
            if(pic==NULL) return -1;
2579
            pic->reference= 0;
2580
            break;
2581
        case MMCO_LONG:
2582
            pic= remove_long(h, mmco[i].long_index);
2583
            if(pic) pic->reference=0;
2584
            
2585
            h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
2586
            h->long_ref[ mmco[i].long_index ]->long_ref=1;
2587
            h->long_ref_count++;
2588
            
2589
            current_is_long=1;
2590
            break;
2591
        case MMCO_SET_MAX_LONG:
2592
            assert(mmco[i].long_index <= 16);
2593
            while(mmco[i].long_index < h->long_ref_count){
2594
                pic= remove_long(h, mmco[i].long_index);
2595
                pic->reference=0;
2596
            }
2597
            while(mmco[i].long_index > h->long_ref_count){
2598
                h->long_ref[ h->long_ref_count++ ]= NULL;
2599
            }
2600
            break;
2601
        case MMCO_RESET:
2602
            while(h->short_ref_count){
2603
                pic= remove_short(h, h->short_ref[0]->frame_num);
2604
                pic->reference=0;
2605
            }
2606
            while(h->long_ref_count){
2607
                pic= remove_long(h, h->long_ref_count-1);
2608
                pic->reference=0;
2609
            }
2610
            break;
2611
        default: assert(0);
2612
        }
2613
    }
2614
    
2615
    if(!current_is_long){
2616
        pic= remove_short(h, s->current_picture_ptr->frame_num);
2617
        if(pic){
2618
            pic->reference=0;
2619
            fprintf(stderr, "illegal short term buffer state detected\n");
2620
        }
2621
        
2622
        if(h->short_ref_count)
2623
            memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
2624

    
2625
        h->short_ref[0]= s->current_picture_ptr;
2626
        h->short_ref[0]->long_ref=0;
2627
        h->short_ref_count++;
2628
    }
2629
    
2630
    return 0; 
2631
}
2632

    
2633
static int decode_ref_pic_marking(H264Context *h){
2634
    MpegEncContext * const s = &h->s;
2635
    int i;
2636
    
2637
    if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
2638
        s->broken_link= get_bits1(&s->gb) -1;
2639
        h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
2640
        if(h->mmco[0].long_index == -1)
2641
            h->mmco_index= 0;
2642
        else{
2643
            h->mmco[0].opcode= MMCO_LONG;
2644
            h->mmco_index= 1;
2645
        } 
2646
    }else{
2647
        if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
2648
            for(i= h->mmco_index; i<MAX_MMCO_COUNT; i++) { 
2649
                MMCOOpcode opcode= get_ue_golomb(&s->gb);;
2650

    
2651
                h->mmco[i].opcode= opcode;
2652
                if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
2653
                    h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
2654
/*                    if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
2655
                        fprintf(stderr, "illegal short ref in memory management control operation %d\n", mmco);
2656
                        return -1;
2657
                    }*/
2658
                }
2659
                if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
2660
                    h->mmco[i].long_index= get_ue_golomb(&s->gb);
2661
                    if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
2662
                        fprintf(stderr, "illegal long ref in memory management control operation %d\n", opcode);
2663
                        return -1;
2664
                    }
2665
                }
2666
                    
2667
                if(opcode > MMCO_LONG){
2668
                    fprintf(stderr, "illegal memory management control operation %d\n", opcode);
2669
                    return -1;
2670
                }
2671
            }
2672
            h->mmco_index= i;
2673
        }else{
2674
            assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
2675

    
2676
            if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
2677
                h->mmco[0].opcode= MMCO_SHORT2UNUSED;
2678
                h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
2679
                h->mmco_index= 1;
2680
            }else
2681
                h->mmco_index= 0;
2682
        }
2683
    }
2684
    
2685
    return 0; 
2686
}
2687

    
2688
static int init_poc(H264Context *h){
2689
    MpegEncContext * const s = &h->s;
2690
    const int max_frame_num= 1<<h->sps.log2_max_frame_num;
2691
    int field_poc[2];
2692

    
2693
    if(h->nal_unit_type == NAL_IDR_SLICE){
2694
        h->frame_num_offset= 0;
2695
    }else{
2696
        if(h->frame_num < h->prev_frame_num)
2697
            h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
2698
        else
2699
            h->frame_num_offset= h->prev_frame_num_offset;
2700
    }
2701

    
2702
    if(h->sps.poc_type==0){
2703
        const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
2704

    
2705
        if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
2706
            h->poc_msb = h->prev_poc_msb + max_poc_lsb;
2707
        else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
2708
            h->poc_msb = h->prev_poc_msb - max_poc_lsb;
2709
        else
2710
            h->poc_msb = h->prev_poc_msb;
2711
//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
2712
        field_poc[0] = 
2713
        field_poc[1] = h->poc_msb + h->poc_lsb;
2714
        if(s->picture_structure == PICT_FRAME) 
2715
            field_poc[1] += h->delta_poc_bottom;
2716
    }else if(h->sps.poc_type==1){
2717
        int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
2718
        int i;
2719

    
2720
        if(h->sps.poc_cycle_length != 0)
2721
            abs_frame_num = h->frame_num_offset + h->frame_num;
2722
        else
2723
            abs_frame_num = 0;
2724

    
2725
        if(h->nal_ref_idc==0 && abs_frame_num > 0)
2726
            abs_frame_num--;
2727
            
2728
        expected_delta_per_poc_cycle = 0;
2729
        for(i=0; i < h->sps.poc_cycle_length; i++)
2730
            expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
2731

    
2732
        if(abs_frame_num > 0){
2733
            int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
2734
            int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
2735

    
2736
            expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
2737
            for(i = 0; i <= frame_num_in_poc_cycle; i++)
2738
                expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
2739
        } else
2740
            expectedpoc = 0;
2741

    
2742
        if(h->nal_ref_idc == 0) 
2743
            expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
2744
        
2745
        field_poc[0] = expectedpoc + h->delta_poc[0];
2746
        field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
2747

    
2748
        if(s->picture_structure == PICT_FRAME)
2749
            field_poc[1] += h->delta_poc[1];
2750
    }else{
2751
        int poc;
2752
        if(h->nal_unit_type == NAL_IDR_SLICE){
2753
            poc= 0;
2754
        }else{
2755
            if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
2756
            else               poc= 2*(h->frame_num_offset + h->frame_num) - 1;
2757
        }
2758
        field_poc[0]= poc;
2759
        field_poc[1]= poc;
2760
    }
2761
    
2762
    if(s->picture_structure != PICT_BOTTOM_FIELD)
2763
        s->current_picture_ptr->field_poc[0]= field_poc[0];
2764
    if(s->picture_structure != PICT_TOP_FIELD)
2765
        s->current_picture_ptr->field_poc[1]= field_poc[1];
2766
    if(s->picture_structure == PICT_FRAME) // FIXME field pix?
2767
        s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
2768

    
2769
    return 0;
2770
}
2771

    
2772
/**
2773
 * decodes a slice header.
2774
 * this will allso call MPV_common_init() and frame_start() as needed
2775
 */
2776
static int decode_slice_header(H264Context *h){
2777
    MpegEncContext * const s = &h->s;
2778
    int first_mb_in_slice, pps_id;
2779
    int num_ref_idx_active_override_flag;
2780
    static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
2781
    float new_aspect;
2782

    
2783
    s->current_picture.reference= h->nal_ref_idc != 0;
2784

    
2785
    first_mb_in_slice= get_ue_golomb(&s->gb);
2786

    
2787
    h->slice_type= get_ue_golomb(&s->gb);
2788
    if(h->slice_type > 9){
2789
        fprintf(stderr, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
2790
    }
2791
    if(h->slice_type > 4){
2792
        h->slice_type -= 5;
2793
        h->slice_type_fixed=1;
2794
    }else
2795
        h->slice_type_fixed=0;
2796
    
2797
    h->slice_type= slice_type_map[ h->slice_type ];
2798
    
2799
    s->pict_type= h->slice_type; // to make a few old func happy, its wrong though
2800
        
2801
    pps_id= get_ue_golomb(&s->gb);
2802
    if(pps_id>255){
2803
        fprintf(stderr, "pps_id out of range\n");
2804
        return -1;
2805
    }
2806
    h->pps= h->pps_buffer[pps_id];
2807
    if(h->pps.slice_group_count == 0){
2808
        fprintf(stderr, "non existing PPS referenced\n");
2809
        return -1;
2810
    }
2811

    
2812
    h->sps= h->sps_buffer[ h->pps.sps_id ];
2813
    if(h->sps.log2_max_frame_num == 0){
2814
        fprintf(stderr, "non existing SPS referenced\n");
2815
        return -1;
2816
    }
2817
    
2818
    s->mb_width= h->sps.mb_width;
2819
    s->mb_height= h->sps.mb_height;
2820
    
2821
    h->b_stride=  s->mb_width*4;
2822
    h->b8_stride= s->mb_width*2;
2823

    
2824
    s->mb_x = first_mb_in_slice % s->mb_width;
2825
    s->mb_y = first_mb_in_slice / s->mb_width; //FIXME AFFW
2826
    
2827
    s->width = 16*s->mb_width - 2*(h->pps.crop_left + h->pps.crop_right );
2828
    if(h->sps.frame_mbs_only_flag)
2829
        s->height= 16*s->mb_height - 2*(h->pps.crop_top  + h->pps.crop_bottom);
2830
    else
2831
        s->height= 16*s->mb_height - 4*(h->pps.crop_top  + h->pps.crop_bottom); //FIXME recheck
2832
    
2833
    if(h->pps.crop_left || h->pps.crop_top){
2834
        fprintf(stderr, "insane croping not completly supported, this could look slightly wrong ...\n");
2835
    }
2836

    
2837
    if(s->aspected_height) //FIXME emms at end of slice ?
2838
        new_aspect= h->sps.sar_width*s->width / (float)(s->height*h->sps.sar_height);
2839
    else
2840
        new_aspect=0;
2841

    
2842
    if (s->context_initialized 
2843
        && (   s->width != s->avctx->width || s->height != s->avctx->height 
2844
            || ABS(new_aspect - s->avctx->aspect_ratio) > 0.001)) {
2845
        free_tables(h);
2846
        MPV_common_end(s);
2847
    }
2848
    if (!s->context_initialized) {
2849
        if (MPV_common_init(s) < 0)
2850
            return -1;
2851

    
2852
        alloc_tables(h);
2853

    
2854
        s->avctx->width = s->width;
2855
        s->avctx->height = s->height;
2856
        s->avctx->aspect_ratio= new_aspect;
2857
    }
2858

    
2859
    if(first_mb_in_slice == 0){
2860
        frame_start(h);
2861
    }
2862

    
2863
    s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
2864
    h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
2865

    
2866
    if(h->sps.frame_mbs_only_flag){
2867
        s->picture_structure= PICT_FRAME;
2868
    }else{
2869
        if(get_bits1(&s->gb)) //field_pic_flag
2870
            s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
2871
        else
2872
            s->picture_structure= PICT_FRAME;
2873
    }
2874

    
2875
    if(s->picture_structure==PICT_FRAME){
2876
        h->curr_pic_num=   h->frame_num;
2877
        h->max_pic_num= 1<< h->sps.log2_max_frame_num;
2878
    }else{
2879
        h->curr_pic_num= 2*h->frame_num;
2880
        h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
2881
    }
2882
        
2883
    if(h->nal_unit_type == NAL_IDR_SLICE){
2884
        int idr_pic_id= get_ue_golomb(&s->gb);
2885
    }
2886
   
2887
    if(h->sps.poc_type==0){
2888
        h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
2889
        
2890
        if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
2891
            h->delta_poc_bottom= get_se_golomb(&s->gb);
2892
        }
2893
    }
2894
    
2895
    if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
2896
        h->delta_poc[0]= get_se_golomb(&s->gb);
2897
        
2898
        if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
2899
            h->delta_poc[1]= get_se_golomb(&s->gb);
2900
    }
2901
    
2902
    init_poc(h);
2903
    
2904
    if(h->pps.redundant_pic_cnt_present){
2905
        h->redundant_pic_count= get_ue_golomb(&s->gb);
2906
    }
2907

    
2908
    //set defaults, might be overriden a few line later
2909
    h->ref_count[0]= h->pps.ref_count[0];
2910
    h->ref_count[1]= h->pps.ref_count[1];
2911

    
2912
    if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
2913
        if(h->slice_type == B_TYPE){
2914
            h->direct_spatial_mv_pred= get_bits1(&s->gb);
2915
        }
2916
        num_ref_idx_active_override_flag= get_bits1(&s->gb);
2917
    
2918
        if(num_ref_idx_active_override_flag){
2919
            h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
2920
            if(h->slice_type==B_TYPE)
2921
                h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
2922

    
2923
            if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
2924
                fprintf(stderr, "reference overflow\n");
2925
                return -1;
2926
            }
2927
        }
2928
    }
2929

    
2930
    if(first_mb_in_slice == 0){
2931
        fill_default_ref_list(h);
2932
    }
2933

    
2934
    decode_ref_pic_list_reordering(h);
2935

    
2936
    if(   (h->pps.weighted_pred          && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE )) 
2937
       || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
2938
        pred_weight_table(h);
2939
    
2940
    if(s->current_picture.reference)
2941
        decode_ref_pic_marking(h);
2942
    //FIXME CABAC stuff
2943

    
2944
    s->qscale = h->pps.init_qp + get_se_golomb(&s->gb); //slice_qp_delta
2945
    //FIXME qscale / qp ... stuff
2946
    if(h->slice_type == SP_TYPE){
2947
        int sp_for_switch_flag= get_bits1(&s->gb);
2948
    }
2949
    if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
2950
        int slice_qs_delta= get_se_golomb(&s->gb);
2951
    }
2952

    
2953
    if( h->pps.deblocking_filter_parameters_present ) {
2954
        h->disable_deblocking_filter_idc= get_ue_golomb(&s->gb);
2955
        if( h->disable_deblocking_filter_idc  !=  1 ) {
2956
            h->slice_alpha_c0_offset_div2= get_se_golomb(&s->gb);
2957
            h->slice_beta_offset_div2= get_se_golomb(&s->gb);
2958
        }
2959
    }else
2960
        h->disable_deblocking_filter_idc= 0;
2961

    
2962
#if 0 //FMO
2963
    if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
2964
        slice_group_change_cycle= get_bits(&s->gb, ?);
2965
#endif
2966

    
2967
    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
2968
        printf("mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d\n", 
2969
               first_mb_in_slice, 
2970
               ff_get_pict_type_char(h->slice_type),
2971
               pps_id, h->frame_num,
2972
               s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
2973
               h->ref_count[0], h->ref_count[1],
2974
               s->qscale,
2975
               h->disable_deblocking_filter_idc
2976
               );
2977
    }
2978

    
2979
    return 0;
2980
}
2981

    
2982
/**
2983
 *
2984
 */
2985
static inline int get_level_prefix(GetBitContext *gb){
2986
    unsigned int buf;
2987
    int log;
2988
    
2989
    OPEN_READER(re, gb);
2990
    UPDATE_CACHE(re, gb);
2991
    buf=GET_CACHE(re, gb);
2992
    
2993
    log= 32 - av_log2(buf);
2994
#ifdef TRACE
2995
    print_bin(buf>>(32-log), log);
2996
    printf("%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
2997
#endif
2998

    
2999
    LAST_SKIP_BITS(re, gb, log);
3000
    CLOSE_READER(re, gb);
3001

    
3002
    return log-1;
3003
}
3004

    
3005
/**
3006
 * decodes a residual block.
3007
 * @param n block index
3008
 * @param scantable scantable
3009
 * @param max_coeff number of coefficients in the block
3010
 * @return <0 if an error occured
3011
 */
3012
static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, int qp, int max_coeff){
3013
    MpegEncContext * const s = &h->s;
3014
    const uint16_t *qmul= dequant_coeff[qp];
3015
    static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
3016
    int level[16], run[16];
3017
    int suffix_length, zeros_left, coeff_num, coeff_token, total_coeff, i, trailing_ones;
3018

    
3019
    //FIXME put trailing_onex into the context
3020

    
3021
    if(n == CHROMA_DC_BLOCK_INDEX){
3022
        coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
3023
        total_coeff= coeff_token>>2;
3024
    }else{    
3025
        if(n == LUMA_DC_BLOCK_INDEX){
3026
            total_coeff= pred_non_zero_count(h, 0);
3027
            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
3028
            total_coeff= coeff_token>>2;
3029
        }else{
3030
            total_coeff= pred_non_zero_count(h, n);
3031
            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
3032
            total_coeff= coeff_token>>2;
3033
            h->non_zero_count_cache[ scan8[n] ]= total_coeff;
3034
        }
3035
    }
3036

    
3037
    //FIXME set last_non_zero?
3038

    
3039
    if(total_coeff==0)
3040
        return 0;
3041
        
3042
    trailing_ones= coeff_token&3;
3043
    tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
3044
    assert(total_coeff<=16);
3045
    
3046
    for(i=0; i<trailing_ones; i++){
3047
        level[i]= 1 - 2*get_bits1(gb);
3048
    }
3049

    
3050
    suffix_length= total_coeff > 10 && trailing_ones < 3;
3051

    
3052
    for(; i<total_coeff; i++){
3053
        const int prefix= get_level_prefix(gb);
3054
        int level_code, mask;
3055

    
3056
        if(prefix<14){ //FIXME try to build a large unified VLC table for all this
3057
            if(suffix_length)
3058
                level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
3059
            else
3060
                level_code= (prefix<<suffix_length); //part
3061
        }else if(prefix==14){
3062
            if(suffix_length)
3063
                level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
3064
            else
3065
                level_code= prefix + get_bits(gb, 4); //part
3066
        }else if(prefix==15){
3067
            level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
3068
            if(suffix_length==0) level_code+=15; //FIXME doesnt make (much)sense
3069
        }else{
3070
            fprintf(stderr, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
3071
            return -1;
3072
        }
3073

    
3074
        if(i==trailing_ones && i<3) level_code+= 2; //FIXME split first iteration
3075

    
3076
        mask= -(level_code&1);
3077
        level[i]= (((2+level_code)>>1) ^ mask) - mask;
3078

    
3079
        if(suffix_length==0) suffix_length=1; //FIXME split first iteration
3080

    
3081
#if 1
3082
        if(ABS(level[i]) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
3083
#else        
3084
        if((2+level_code)>>1) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
3085
        ? == prefix > 2 or sth
3086
#endif
3087
        tprintf("level: %d suffix_length:%d\n", level[i], suffix_length);
3088
    }
3089

    
3090
    if(total_coeff == max_coeff)
3091
        zeros_left=0;
3092
    else{
3093
        if(n == CHROMA_DC_BLOCK_INDEX)
3094
            zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
3095
        else
3096
            zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
3097
    }
3098
    
3099
    for(i=0; i<total_coeff-1; i++){
3100
        if(zeros_left <=0)
3101
            break;
3102
        else if(zeros_left < 7){
3103
            run[i]= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
3104
        }else{
3105
            run[i]= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
3106
        }
3107
        zeros_left -= run[i];
3108
    }
3109

    
3110
    if(zeros_left<0){
3111
        fprintf(stderr, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
3112
        return -1;
3113
    }
3114
    
3115
    for(; i<total_coeff-1; i++){
3116
        run[i]= 0;
3117
    }
3118

    
3119
    run[i]= zeros_left;
3120

    
3121
    coeff_num=-1;
3122
    if(n > 24){
3123
        for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
3124
            int j;
3125

    
3126
            coeff_num += run[i] + 1; //FIXME add 1 earlier ?
3127
            j= scantable[ coeff_num ];
3128

    
3129
            block[j]= level[i];
3130
        }
3131
    }else{
3132
        for(i=total_coeff-1; i>=0; i--){ //FIXME merge into  rundecode?
3133
            int j;
3134

    
3135
            coeff_num += run[i] + 1; //FIXME add 1 earlier ?
3136
            j= scantable[ coeff_num ];
3137

    
3138
            block[j]= level[i] * qmul[j];
3139
//            printf("%d %d  ", block[j], qmul[j]);
3140
        }
3141
    }
3142
    return 0;
3143
}
3144

    
3145
/**
3146
 * decodes a macroblock
3147
 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
3148
 */
3149
static int decode_mb(H264Context *h){
3150
    MpegEncContext * const s = &h->s;
3151
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
3152
    int mb_type, partition_count, cbp;
3153

    
3154
    memset(h->mb, 0, sizeof(int16_t)*24*16); //FIXME avoid if allready clear (move after skip handlong?
3155

    
3156
    tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
3157

    
3158
    if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
3159
        if(s->mb_skip_run==-1)
3160
            s->mb_skip_run= get_ue_golomb(&s->gb);
3161
        
3162
        if (s->mb_skip_run--) {
3163
            int mx, my;
3164
            /* skip mb */
3165
//FIXME b frame
3166
            mb_type= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0;
3167

    
3168
            memset(h->non_zero_count[mb_xy], 0, 16);
3169
            memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
3170

    
3171
            if(h->sps.mb_aff && s->mb_skip_run==0 && (s->mb_y&1)==0){
3172
                h->mb_field_decoding_flag= get_bits1(&s->gb);
3173
            }
3174

    
3175
            if(h->mb_field_decoding_flag)
3176
                mb_type|= MB_TYPE_INTERLACED;
3177
            
3178
            fill_caches(h, mb_type); //FIXME check what is needed and what not ...
3179
            pred_pskip_motion(h, &mx, &my);
3180
            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
3181
            fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, (mx&0xFFFF)+(my<<16), 4);
3182
            write_back_motion(h, mb_type);
3183

    
3184
            s->current_picture.mb_type[mb_xy]= mb_type; //FIXME SKIP type
3185
            h->slice_table[ mb_xy ]= h->slice_num;
3186

    
3187
            h->prev_mb_skiped= 1;
3188
            return 0;
3189
        }
3190
    }
3191
    if(h->sps.mb_aff /* && !field pic FIXME needed? */){
3192
        if((s->mb_y&1)==0)
3193
            h->mb_field_decoding_flag = get_bits1(&s->gb);
3194
    }else
3195
        h->mb_field_decoding_flag=0; //FIXME som ed note ?!
3196
    
3197
    h->prev_mb_skiped= 0;
3198
    
3199
    mb_type= get_ue_golomb(&s->gb);
3200
    if(h->slice_type == B_TYPE){
3201
        if(mb_type < 23){
3202
            partition_count= b_mb_type_info[mb_type].partition_count;
3203
            mb_type=         b_mb_type_info[mb_type].type;
3204
        }else{
3205
            mb_type -= 23;
3206
            goto decode_intra_mb;
3207
        }
3208
    }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
3209
        if(mb_type < 5){
3210
            partition_count= p_mb_type_info[mb_type].partition_count;
3211
            mb_type=         p_mb_type_info[mb_type].type;
3212
        }else{
3213
            mb_type -= 5;
3214
            goto decode_intra_mb;
3215
        }
3216
    }else{
3217
       assert(h->slice_type == I_TYPE);
3218
decode_intra_mb:
3219
        if(mb_type > 25){
3220
            fprintf(stderr, "mb_type %d in %c slice to large at %d %d\n", mb_type, ff_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
3221
            return -1;
3222
        }
3223
        partition_count=0;
3224
        cbp= i_mb_type_info[mb_type].cbp;
3225
        h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
3226
        mb_type= i_mb_type_info[mb_type].type;
3227
    }
3228

    
3229
    if(h->mb_field_decoding_flag)
3230
        mb_type |= MB_TYPE_INTERLACED;
3231

    
3232
    s->current_picture.mb_type[mb_xy]= mb_type;
3233
    h->slice_table[ mb_xy ]= h->slice_num;
3234
    
3235
    if(IS_INTRA_PCM(mb_type)){
3236
        const uint8_t *ptr;
3237
        int x, y;
3238
        
3239
        // we assume these blocks are very rare so we dont optimize it
3240
        align_get_bits(&s->gb);
3241
        
3242
        ptr= s->gb.buffer + get_bits_count(&s->gb);
3243
    
3244
        for(y=0; y<16; y++){
3245
            const int index= 4*(y&3) + 64*(y>>2);
3246
            for(x=0; x<16; x++){
3247
                h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++);
3248
            }
3249
        }
3250
        for(y=0; y<8; y++){
3251
            const int index= 256 + 4*(y&3) + 32*(y>>2);
3252
            for(x=0; x<8; x++){
3253
                h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++);
3254
            }
3255
        }
3256
        for(y=0; y<8; y++){
3257
            const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
3258
            for(x=0; x<8; x++){
3259
                h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++);
3260
            }
3261
        }
3262
    
3263
        skip_bits(&s->gb, 384); //FIXME check /fix the bitstream readers
3264
        
3265
        memset(h->non_zero_count[mb_xy], 16, 16);
3266
        
3267
        return 0;
3268
    }
3269
        
3270
    fill_caches(h, mb_type);
3271

    
3272
    //mb_pred
3273
    if(IS_INTRA(mb_type)){
3274
//            init_top_left_availability(h);
3275
            if(IS_INTRA4x4(mb_type)){
3276
                int i;
3277

    
3278
//                fill_intra4x4_pred_table(h);
3279
                for(i=0; i<16; i++){
3280
                    const int mode_coded= !get_bits1(&s->gb);
3281
                    const int predicted_mode=  pred_intra_mode(h, i);
3282
                    int mode;
3283

    
3284
                    if(mode_coded){
3285
                        const int rem_mode= get_bits(&s->gb, 3);
3286
                        if(rem_mode<predicted_mode)
3287
                            mode= rem_mode;
3288
                        else
3289
                            mode= rem_mode + 1;
3290
                    }else{
3291
                        mode= predicted_mode;
3292
                    }
3293
                    
3294
                    h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
3295
                }
3296
                write_back_intra_pred_mode(h);
3297
                if( check_intra4x4_pred_mode(h) < 0)
3298
                    return -1;
3299
            }else{
3300
                h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
3301
                if(h->intra16x16_pred_mode < 0)
3302
                    return -1;
3303
            }
3304
            h->chroma_pred_mode= get_ue_golomb(&s->gb);
3305

    
3306
            h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
3307
            if(h->chroma_pred_mode < 0)
3308
                return -1;
3309
    }else if(partition_count==4){
3310
        int i, j, sub_partition_count[4], list, ref[2][4];
3311
        
3312
        if(h->slice_type == B_TYPE){
3313
            for(i=0; i<4; i++){
3314
                h->sub_mb_type[i]= get_ue_golomb(&s->gb);
3315
                if(h->sub_mb_type[i] >=13){
3316
                    fprintf(stderr, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
3317
                    return -1;
3318
                }
3319
                sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
3320
                h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
3321
            }
3322
        }else{
3323
            assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
3324
            for(i=0; i<4; i++){
3325
                h->sub_mb_type[i]= get_ue_golomb(&s->gb);
3326
                if(h->sub_mb_type[i] >=4){
3327
                    fprintf(stderr, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
3328
                    return -1;
3329
                }
3330
                sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
3331
                h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
3332
            }
3333
        }
3334
        
3335
        for(list=0; list<2; list++){
3336
            const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
3337
            if(ref_count == 0) continue;
3338
            for(i=0; i<4; i++){
3339
                if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
3340
                    ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
3341
                }else{
3342
                 //FIXME
3343
                    ref[list][i] = -1;
3344
                }
3345
            }
3346
        }
3347
        
3348
        for(list=0; list<2; list++){
3349
            const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
3350
            if(ref_count == 0) continue;
3351

    
3352
            for(i=0; i<4; i++){
3353
                h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
3354
                h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
3355

    
3356
                if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
3357
                    const int sub_mb_type= h->sub_mb_type[i];
3358
                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
3359
                    for(j=0; j<sub_partition_count[i]; j++){
3360
                        int mx, my;
3361
                        const int index= 4*i + block_width*j;
3362
                        int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
3363
                        pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
3364
                        mx += get_se_golomb(&s->gb);
3365
                        my += get_se_golomb(&s->gb);
3366
                        tprintf("final mv:%d %d\n", mx, my);
3367

    
3368
                        if(IS_SUB_8X8(sub_mb_type)){
3369
                            mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= 
3370
                            mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
3371
                            mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= 
3372
                            mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
3373
                        }else if(IS_SUB_8X4(sub_mb_type)){
3374
                            mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
3375
                            mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
3376
                        }else if(IS_SUB_4X8(sub_mb_type)){
3377
                            mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
3378
                            mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
3379
                        }else{
3380
                            assert(IS_SUB_4X4(sub_mb_type));
3381
                            mv_cache[ 0 ][0]= mx;
3382
                            mv_cache[ 0 ][1]= my;
3383
                        }
3384
                    }
3385
                }else{
3386
                    uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
3387
                    p[0] = p[1]=
3388
                    p[8] = p[9]= 0;
3389
                }
3390
            }
3391
        }
3392
    }else if(!IS_DIRECT(mb_type)){
3393
        int list, mx, my, i;
3394
         //FIXME we should set ref_idx_l? to 0 if we use that later ...
3395
        if(IS_16X16(mb_type)){
3396
            for(list=0; list<2; list++){
3397
                if(h->ref_count[0]>0){
3398
                    if(IS_DIR(mb_type, 0, list)){
3399
                        const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
3400
                        fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
3401
                    }
3402
                }
3403
            }
3404
            for(list=0; list<2; list++){
3405
                if(IS_DIR(mb_type, 0, list)){
3406
                    pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
3407
                    mx += get_se_golomb(&s->gb);
3408
                    my += get_se_golomb(&s->gb);
3409
                    tprintf("final mv:%d %d\n", mx, my);
3410

    
3411
                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, (mx&0xFFFF) + (my<<16), 4);
3412
                }
3413
            }
3414
        }
3415
        else if(IS_16X8(mb_type)){
3416
            for(list=0; list<2; list++){
3417
                if(h->ref_count[list]>0){
3418
                    for(i=0; i<2; i++){
3419
                        if(IS_DIR(mb_type, i, list)){
3420
                            const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
3421
                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
3422
                        }
3423
                    }
3424
                }
3425
            }
3426
            for(list=0; list<2; list++){
3427
                for(i=0; i<2; i++){
3428
                    if(IS_DIR(mb_type, i, list)){
3429
                        pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
3430
                        mx += get_se_golomb(&s->gb);
3431
                        my += get_se_golomb(&s->gb);
3432
                        tprintf("final mv:%d %d\n", mx, my);
3433

    
3434
                        fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (mx&0xFFFF) + (my<<16), 4);
3435
                    }
3436
                }
3437
            }
3438
        }else{
3439
            assert(IS_8X16(mb_type));
3440
            for(list=0; list<2; list++){
3441
                if(h->ref_count[list]>0){
3442
                    for(i=0; i<2; i++){
3443
                        if(IS_DIR(mb_type, i, list)){ //FIXME optimize
3444
                            const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
3445
                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
3446
                        }
3447
                    }
3448
                }
3449
            }
3450
            for(list=0; list<2; list++){
3451
                for(i=0; i<2; i++){
3452
                    if(IS_DIR(mb_type, i, list)){
3453
                        pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
3454
                        mx += get_se_golomb(&s->gb);
3455
                        my += get_se_golomb(&s->gb);
3456
                        tprintf("final mv:%d %d\n", mx, my);
3457

    
3458
                        fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (mx&0xFFFF) + (my<<16), 4);
3459
                    }
3460
                }
3461
            }
3462
        }
3463
    }
3464
    
3465
    if(IS_INTER(mb_type))
3466
        write_back_motion(h, mb_type);
3467
    
3468
    if(!IS_INTRA16x16(mb_type)){
3469
        cbp= get_ue_golomb(&s->gb);
3470
        if(cbp > 47){
3471
            fprintf(stderr, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
3472
            return -1;
3473
        }
3474
        
3475
        if(IS_INTRA4x4(mb_type))
3476
            cbp= golomb_to_intra4x4_cbp[cbp];
3477
        else
3478
            cbp= golomb_to_inter_cbp[cbp];
3479
    }
3480

    
3481
    if(cbp || IS_INTRA16x16(mb_type)){
3482
        int i8x8, i4x4, chroma_idx;
3483
        int chroma_qp, dquant;
3484
        GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
3485
        const uint8_t *scan, *dc_scan;
3486
        
3487
//        fill_non_zero_count_cache(h);
3488

    
3489
        if(IS_INTERLACED(mb_type)){
3490
            scan= field_scan;
3491
            dc_scan= luma_dc_field_scan;
3492
        }else{
3493
            scan= zigzag_scan;
3494
            dc_scan= luma_dc_zigzag_scan;
3495
        }
3496

    
3497
        dquant= get_se_golomb(&s->gb);
3498

    
3499
        if( dquant > 25 || dquant < -26 ){
3500
            fprintf(stderr, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
3501
            return -1;
3502
        }
3503
        
3504
        s->qscale += dquant;
3505
        if(((unsigned)s->qscale) > 51){
3506
            if(s->qscale<0) s->qscale+= 52;
3507
            else            s->qscale-= 52;
3508
        }
3509
        
3510
        h->chroma_qp= chroma_qp= get_chroma_qp(h, s->qscale);
3511
        if(IS_INTRA16x16(mb_type)){
3512
            if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, s->qscale, 16) < 0){
3513
                return -1; //FIXME continue if partotioned and other retirn -1 too
3514
            }
3515

    
3516
            assert((cbp&15) == 0 || (cbp&15) == 15);
3517

    
3518
            if(cbp&15){
3519
                for(i8x8=0; i8x8<4; i8x8++){
3520
                    for(i4x4=0; i4x4<4; i4x4++){
3521
                        const int index= i4x4 + 4*i8x8;
3522
                        if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, s->qscale, 15) < 0 ){
3523
                            return -1;
3524
                        }
3525
                    }
3526
                }
3527
            }else{
3528
                memset(&h->non_zero_count_cache[8], 0, 8*4); //FIXME stupid & slow
3529
            }
3530
        }else{
3531
            for(i8x8=0; i8x8<4; i8x8++){
3532
                if(cbp & (1<<i8x8)){
3533
                    for(i4x4=0; i4x4<4; i4x4++){
3534
                        const int index= i4x4 + 4*i8x8;
3535
                        
3536
                        if( decode_residual(h, gb, h->mb + 16*index, index, scan, s->qscale, 16) <0 ){
3537
                            return -1;
3538
                        }
3539
                    }
3540
                }else{
3541
                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
3542
                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
3543
                }
3544
            }
3545
        }
3546
        
3547
        if(cbp&0x30){
3548
            for(chroma_idx=0; chroma_idx<2; chroma_idx++)
3549
                if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, chroma_qp, 4) < 0){
3550
                    return -1;
3551
                }
3552
        }
3553

    
3554
        if(cbp&0x20){
3555
            for(chroma_idx=0; chroma_idx<2; chroma_idx++){
3556
                for(i4x4=0; i4x4<4; i4x4++){
3557
                    const int index= 16 + 4*chroma_idx + i4x4;
3558
                    if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, chroma_qp, 15) < 0){
3559
                        return -1;
3560
                    }
3561
                }
3562
            }
3563
        }else{
3564
            uint8_t * const nnz= &h->non_zero_count_cache[0];
3565
            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
3566
            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
3567
        }
3568
    }else{
3569
        memset(&h->non_zero_count_cache[8], 0, 8*5);
3570
    }
3571
    write_back_non_zero_count(h);
3572

    
3573
    return 0;
3574
}
3575

    
3576
static int decode_slice(H264Context *h){
3577
    MpegEncContext * const s = &h->s;
3578
    const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
3579

    
3580
    s->mb_skip_run= -1;
3581
    
3582
#if 1
3583
    for(;;){
3584
        int ret= decode_mb(h);
3585
            
3586
        hl_decode_mb(h);
3587
        
3588
        if(ret>=0 && h->sps.mb_aff){ //FIXME optimal? or let mb_decode decode 16x32 ?
3589
            s->mb_y++;
3590
            ret= decode_mb(h);
3591
            
3592
            hl_decode_mb(h);
3593
            s->mb_y--;
3594
        }
3595

    
3596
        if(ret<0){
3597
            fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
3598
            ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
3599

    
3600
            return -1;
3601
        }
3602
        
3603
        if(++s->mb_x >= s->mb_width){
3604
            s->mb_x=0;
3605
            ff_draw_horiz_band(s, 16*s->mb_y, 16);
3606
            if(++s->mb_y >= s->mb_height){
3607
                tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
3608

    
3609
                if(get_bits_count(&s->gb) == s->gb.size_in_bits){
3610
                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
3611

    
3612
                    return 0;
3613
                }else{
3614
                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
3615

    
3616
                    return -1;
3617
                }
3618
            }
3619
        }
3620
        
3621
        if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
3622
            if(get_bits_count(&s->gb) == s->gb.size_in_bits){
3623
                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
3624

    
3625
                return 0;
3626
            }else{
3627
                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
3628

    
3629
                return -1;
3630
            }
3631
        }
3632
    }
3633
#endif
3634
#if 0
3635
    for(;s->mb_y < s->mb_height; s->mb_y++){
3636
        for(;s->mb_x < s->mb_width; s->mb_x++){
3637
            int ret= decode_mb(h);
3638
            
3639
            hl_decode_mb(h);
3640

3641
            if(ret<0){
3642
                fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
3643
                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
3644

3645
                return -1;
3646
            }
3647
        
3648
            if(++s->mb_x >= s->mb_width){
3649
                s->mb_x=0;
3650
                if(++s->mb_y >= s->mb_height){
3651
                    if(get_bits_count(s->gb) == s->gb.size_in_bits){
3652
                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
3653

3654
                        return 0;
3655
                    }else{
3656
                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
3657

3658
                        return -1;
3659
                    }
3660
                }
3661
            }
3662
        
3663
            if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
3664
                if(get_bits_count(s->gb) == s->gb.size_in_bits){
3665
                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
3666

3667
                    return 0;
3668
                }else{
3669
                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
3670

3671
                    return -1;
3672
                }
3673
            }
3674
        }
3675
        s->mb_x=0;
3676
        ff_draw_horiz_band(s, 16*s->mb_y, 16);
3677
    }
3678
#endif
3679
    return -1; //not reached
3680
}
3681

    
3682
static inline int decode_vui_parameters(H264Context *h, SPS *sps){
3683
    MpegEncContext * const s = &h->s;
3684
    int aspect_ratio_info_present_flag, aspect_ratio_idc;
3685

    
3686
    aspect_ratio_info_present_flag= get_bits1(&s->gb);
3687
    
3688
    if( aspect_ratio_info_present_flag ) {
3689
        aspect_ratio_idc= get_bits(&s->gb, 8);
3690
        if( aspect_ratio_idc == EXTENDED_SAR ) {
3691
            sps->sar_width= get_bits(&s->gb, 16);
3692
            sps->sar_height= get_bits(&s->gb, 16);
3693
        }else if(aspect_ratio_idc < 16){
3694
            sps->sar_width=  pixel_aspect[aspect_ratio_idc][0];
3695
            sps->sar_height= pixel_aspect[aspect_ratio_idc][1];
3696
        }else{
3697
            fprintf(stderr, "illegal aspect ratio\n");
3698
            return -1;
3699
        }
3700
    }else{
3701
        sps->sar_width= 
3702
        sps->sar_height= 0;
3703
    }
3704
//            s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
3705
#if 0
3706
| overscan_info_present_flag                        |0  |u(1)    |
3707
| if( overscan_info_present_flag )                  |   |        |
3708
|  overscan_appropriate_flag                        |0  |u(1)    |
3709
| video_signal_type_present_flag                    |0  |u(1)    |
3710
| if( video_signal_type_present_flag ) {            |   |        |
3711
|  video_format                                     |0  |u(3)    |
3712
|  video_full_range_flag                            |0  |u(1)    |
3713
|  colour_description_present_flag                  |0  |u(1)    |
3714
|  if( colour_description_present_flag ) {          |   |        |
3715
|   colour_primaries                                |0  |u(8)    |
3716
|   transfer_characteristics                        |0  |u(8)    |
3717
|   matrix_coefficients                             |0  |u(8)    |
3718
|  }                                                |   |        |
3719
| }                                                 |   |        |
3720
| chroma_location_info_present_flag                 |0  |u(1)    |
3721
| if ( chroma_location_info_present_flag ) {        |   |        |
3722
|  chroma_sample_location_type_top_field            |0  |ue(v)   |
3723
|  chroma_sample_location_type_bottom_field         |0  |ue(v)   |
3724
| }                                                 |   |        |
3725
| timing_info_present_flag                          |0  |u(1)    |
3726
| if( timing_info_present_flag ) {                  |   |        |
3727
|  num_units_in_tick                                |0  |u(32)   |
3728
|  time_scale                                       |0  |u(32)   |
3729
|  fixed_frame_rate_flag                            |0  |u(1)    |
3730
| }                                                 |   |        |
3731
| nal_hrd_parameters_present_flag                   |0  |u(1)    |
3732
| if( nal_hrd_parameters_present_flag  = =  1)      |   |        |
3733
|  hrd_parameters( )                                |   |        |
3734
| vcl_hrd_parameters_present_flag                   |0  |u(1)    |
3735
| if( vcl_hrd_parameters_present_flag  = =  1)      |   |        |
3736
|  hrd_parameters( )                                |   |        |
3737
| if( ( nal_hrd_parameters_present_flag  = =  1  | ||   |        |
3738
|                                                   |   |        |
3739
|( vcl_hrd_parameters_present_flag  = =  1 ) )      |   |        |
3740
|  low_delay_hrd_flag                               |0  |u(1)    |
3741
| bitstream_restriction_flag                        |0  |u(1)    |
3742
| if( bitstream_restriction_flag ) {                |0  |u(1)    |
3743
|  motion_vectors_over_pic_boundaries_flag          |0  |u(1)    |
3744
|  max_bytes_per_pic_denom                          |0  |ue(v)   |
3745
|  max_bits_per_mb_denom                            |0  |ue(v)   |
3746
|  log2_max_mv_length_horizontal                    |0  |ue(v)   |
3747
|  log2_max_mv_length_vertical                      |0  |ue(v)   |
3748
|  num_reorder_frames                               |0  |ue(v)   |
3749
|  max_dec_frame_buffering                          |0  |ue(v)   |
3750
| }                                                 |   |        |
3751
|}                                                  |   |        |
3752
#endif
3753
    return 0;
3754
}
3755

    
3756
static inline int decode_seq_parameter_set(H264Context *h){
3757
    MpegEncContext * const s = &h->s;
3758
    int profile_idc, level_idc, multiple_slice_groups, arbitrary_slice_order, redundant_slices;
3759
    int sps_id, i;
3760
    SPS *sps;
3761
    
3762
    profile_idc= get_bits(&s->gb, 8);
3763
    level_idc= get_bits(&s->gb, 8);
3764
    multiple_slice_groups= get_bits1(&s->gb);
3765
    arbitrary_slice_order= get_bits1(&s->gb);
3766
    redundant_slices= get_bits1(&s->gb);
3767
    
3768
    sps_id= get_ue_golomb(&s->gb);
3769
    
3770
    sps= &h->sps_buffer[ sps_id ];
3771
    
3772
    sps->profile_idc= profile_idc;
3773
    sps->level_idc= level_idc;
3774
    sps->multiple_slice_groups= multiple_slice_groups;
3775
    sps->arbitrary_slice_order= arbitrary_slice_order;
3776
    sps->redundant_slices= redundant_slices;
3777
    
3778
    sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
3779

    
3780
    sps->poc_type= get_ue_golomb(&s->gb);
3781
    
3782
    if(sps->poc_type == 0){ //FIXME #define
3783
        sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
3784
    } else if(sps->poc_type == 1){//FIXME #define
3785
        sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
3786
        sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
3787
        sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
3788
        sps->poc_cycle_length= get_ue_golomb(&s->gb);
3789
        
3790
        for(i=0; i<sps->poc_cycle_length; i++)
3791
            sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
3792
    }
3793
    if(sps->poc_type > 2){
3794
        fprintf(stderr, "illegal POC type %d\n", sps->poc_type);
3795
        return -1;
3796
    }
3797

    
3798
    sps->ref_frame_count= get_ue_golomb(&s->gb);
3799
    sps->required_frame_num_update_behaviour_flag= get_bits1(&s->gb);
3800
    sps->mb_width= get_ue_golomb(&s->gb) + 1;
3801
    sps->mb_height= get_ue_golomb(&s->gb) + 1;
3802
    sps->frame_mbs_only_flag= get_bits1(&s->gb);
3803
    if(!sps->frame_mbs_only_flag)
3804
        sps->mb_aff= get_bits1(&s->gb);
3805
    else
3806
        sps->mb_aff= 0;
3807

    
3808
    sps->direct_8x8_inference_flag= get_bits1(&s->gb);
3809

    
3810
    sps->vui_parameters_present_flag= get_bits1(&s->gb);
3811
    if( sps->vui_parameters_present_flag )
3812
        decode_vui_parameters(h, sps);
3813
    
3814
    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3815
        printf("sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s %s\n", 
3816
               sps_id, sps->profile_idc, sps->level_idc,
3817
               sps->poc_type,
3818
               sps->ref_frame_count,
3819
               sps->mb_width, sps->mb_height,
3820
               sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
3821
               sps->direct_8x8_inference_flag ? "8B8" : "",
3822
               sps->vui_parameters_present_flag ? "VUI" : ""
3823
               );
3824
    }
3825
    return 0;
3826
}
3827

    
3828
static inline int decode_picture_parameter_set(H264Context *h){
3829
    MpegEncContext * const s = &h->s;
3830
    int pps_id= get_ue_golomb(&s->gb);
3831
    PPS *pps= &h->pps_buffer[pps_id];
3832
    
3833
    pps->sps_id= get_ue_golomb(&s->gb);
3834
    pps->cabac= get_bits1(&s->gb);
3835
    pps->pic_order_present= get_bits1(&s->gb);
3836
    pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
3837
    if(pps->slice_group_count > 1 ){
3838
        pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
3839
fprintf(stderr, "FMO not supported\n");
3840
        switch(pps->mb_slice_group_map_type){
3841
        case 0:
3842
#if 0
3843
|   for( i = 0; i <= num_slice_groups_minus1; i++ ) |   |        |
3844
|    run_length[ i ]                                |1  |ue(v)   |
3845
#endif
3846
            break;
3847
        case 2:
3848
#if 0
3849
|   for( i = 0; i < num_slice_groups_minus1; i++ )  |   |        |
3850
|{                                                  |   |        |
3851
|    top_left_mb[ i ]                               |1  |ue(v)   |
3852
|    bottom_right_mb[ i ]                           |1  |ue(v)   |
3853
|   }                                               |   |        |
3854
#endif
3855
            break;
3856
        case 3:
3857
        case 4:
3858
        case 5:
3859
#if 0
3860
|   slice_group_change_direction_flag               |1  |u(1)    |
3861
|   slice_group_change_rate_minus1                  |1  |ue(v)   |
3862
#endif
3863
            break;
3864
        case 6:
3865
#if 0
3866
|   slice_group_id_cnt_minus1                       |1  |ue(v)   |
3867
|   for( i = 0; i <= slice_group_id_cnt_minus1; i++ |   |        |
3868
|)                                                  |   |        |
3869
|    slice_group_id[ i ]                            |1  |u(v)    |
3870
#endif
3871
            break;
3872
        }
3873
    }
3874
    pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3875
    pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3876
    if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
3877
        fprintf(stderr, "reference overflow (pps)\n");
3878
        return -1;
3879
    }
3880
    
3881
    pps->weighted_pred= get_bits1(&s->gb);
3882
    pps->weighted_bipred_idc= get_bits(&s->gb, 2);
3883
    pps->init_qp= get_se_golomb(&s->gb) + 26;
3884
    pps->init_qs= get_se_golomb(&s->gb) + 26;
3885
    pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
3886
    pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
3887
    pps->constrained_intra_pred= get_bits1(&s->gb);
3888
    pps->redundant_pic_cnt_present = get_bits1(&s->gb);
3889
    pps->crop= get_bits1(&s->gb);
3890
    if(pps->crop){
3891
        pps->crop_left  = get_ue_golomb(&s->gb);
3892
        pps->crop_right = get_ue_golomb(&s->gb);
3893
        pps->crop_top   = get_ue_golomb(&s->gb);
3894
        pps->crop_bottom= get_ue_golomb(&s->gb);
3895
    }else{
3896
        pps->crop_left  = 
3897
        pps->crop_right = 
3898
        pps->crop_top   = 
3899
        pps->crop_bottom= 0;
3900
    }
3901
    
3902
    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3903
        printf("pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s crop:%d/%d/%d/%d\n", 
3904
               pps_id, pps->sps_id,
3905
               pps->cabac ? "CABAC" : "CAVLC",
3906
               pps->slice_group_count,
3907
               pps->ref_count[0], pps->ref_count[1],
3908
               pps->weighted_pred ? "weighted" : "",
3909
               pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
3910
               pps->deblocking_filter_parameters_present ? "LPAR" : "",
3911
               pps->constrained_intra_pred ? "CONSTR" : "",
3912
               pps->redundant_pic_cnt_present ? "REDU" : "",
3913
               pps->crop_left, pps->crop_right, 
3914
               pps->crop_top, pps->crop_bottom
3915
               );
3916
    }
3917
    
3918
    return 0;
3919
}
3920

    
3921
/**
3922
 * finds the end of the current frame in the bitstream.
3923
 * @return the position of the first byte of the next frame, or -1
3924
 */
3925
static int find_frame_end(MpegEncContext *s, uint8_t *buf, int buf_size){
3926
    ParseContext *pc= &s->parse_context;
3927
    int i;
3928
    uint32_t state;
3929
//printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
3930
//    mb_addr= pc->mb_addr - 1;
3931
    state= pc->state;
3932
    //FIXME this will fail with slices
3933
    for(i=0; i<buf_size; i++){
3934
        state= (state<<8) | buf[i];
3935
        if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
3936
            if(pc->frame_start_found){
3937
                pc->state=-1; 
3938
                pc->frame_start_found= 0;
3939
                return i-3;
3940
            }
3941
            pc->frame_start_found= 1;
3942
        }
3943
    }
3944
    
3945
    pc->state= state;
3946
    return END_NOT_FOUND;
3947
}
3948

    
3949
static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
3950
    MpegEncContext * const s = &h->s;
3951
    AVCodecContext * const avctx= s->avctx;
3952
    int buf_index=0;
3953
    int i;
3954
#if 0    
3955
    for(i=0; i<32; i++){
3956
        printf("%X ", buf[i]);
3957
    }
3958
#endif
3959
    for(;;){
3960
        int consumed;
3961
        int dst_length;
3962
        int bit_length;
3963
        uint8_t *ptr;
3964
        
3965
        // start code prefix search
3966
        for(; buf_index + 3 < buf_size; buf_index++){
3967
            // this should allways succeed in the first iteration
3968
            if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
3969
                break;
3970
        }
3971
        
3972
        if(buf_index+3 >= buf_size) break;
3973
        
3974
        buf_index+=3;
3975
        
3976
        ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, buf_size - buf_index);
3977
        if(ptr[dst_length - 1] == 0) dst_length--;
3978
        bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
3979

    
3980
        if(s->avctx->debug&FF_DEBUG_STARTCODE){
3981
            printf("NAL %d at %d length %d\n", h->nal_unit_type, buf_index, dst_length);
3982
        }
3983
        
3984
        buf_index += consumed;
3985

    
3986
        if(h->nal_ref_idc < s->hurry_up)
3987
            continue;
3988
        
3989
        switch(h->nal_unit_type){
3990
        case NAL_IDR_SLICE:
3991
            idr(h); //FIXME ensure we dont loose some frames if there is reordering
3992
        case NAL_SLICE:
3993
            init_get_bits(&s->gb, ptr, bit_length);
3994
            h->intra_gb_ptr=
3995
            h->inter_gb_ptr= &s->gb;
3996
            s->data_partitioning = 0;
3997
            
3998
            if(decode_slice_header(h) < 0) return -1;
3999
            if(h->redundant_pic_count==0)
4000
                decode_slice(h);
4001
            break;
4002
        case NAL_DPA:
4003
            init_get_bits(&s->gb, ptr, bit_length);
4004
            h->intra_gb_ptr=
4005
            h->inter_gb_ptr= NULL;
4006
            s->data_partitioning = 1;
4007
            
4008
            if(decode_slice_header(h) < 0) return -1;
4009
            break;
4010
        case NAL_DPB:
4011
            init_get_bits(&h->intra_gb, ptr, bit_length);
4012
            h->intra_gb_ptr= &h->intra_gb;
4013
            break;
4014
        case NAL_DPC:
4015
            init_get_bits(&h->inter_gb, ptr, bit_length);
4016
            h->inter_gb_ptr= &h->inter_gb;
4017

    
4018
            if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning)
4019
                decode_slice(h);
4020
            break;
4021
        case NAL_SEI:
4022
            break;
4023
        case NAL_SPS:
4024
            init_get_bits(&s->gb, ptr, bit_length);
4025
            decode_seq_parameter_set(h);
4026
            
4027
            if(s->flags& CODEC_FLAG_LOW_DELAY)
4028
                s->low_delay=1;
4029
      
4030
            avctx->has_b_frames= !s->low_delay;
4031
            break;
4032
        case NAL_PPS:
4033
            init_get_bits(&s->gb, ptr, bit_length);
4034
            
4035
            decode_picture_parameter_set(h);
4036

    
4037
            break;
4038
        case NAL_PICTURE_DELIMITER:
4039
            break;
4040
        case NAL_FILTER_DATA:
4041
            break;
4042
        }        
4043

    
4044
        //FIXME move after where irt is set
4045
        s->current_picture.pict_type= s->pict_type;
4046
        s->current_picture.key_frame= s->pict_type == I_TYPE;
4047
    }
4048
    
4049
    if(!s->current_picture_ptr) return buf_index; //no frame
4050
    
4051
    h->prev_frame_num_offset= h->frame_num_offset;
4052
    h->prev_frame_num= h->frame_num;
4053
    if(s->current_picture_ptr->reference){
4054
        h->prev_poc_msb= h->poc_msb;
4055
        h->prev_poc_lsb= h->poc_lsb;
4056
    }
4057
    if(s->current_picture_ptr->reference)
4058
        execute_ref_pic_marking(h, h->mmco, h->mmco_index);
4059
    else
4060
        assert(h->mmco_index==0);
4061

    
4062
    ff_er_frame_end(s);
4063
    MPV_frame_end(s);
4064

    
4065
    return buf_index;
4066
}
4067

    
4068
/**
4069
 * retunrs the number of bytes consumed for building the current frame
4070
 */
4071
static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
4072
    if(s->flags&CODEC_FLAG_TRUNCATED){
4073
        pos -= s->parse_context.last_index;
4074
        if(pos<0) pos=0; // FIXME remove (uneeded?)
4075
        
4076
        return pos;
4077
    }else{
4078
        if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
4079
        if(pos+10>buf_size) pos=buf_size; // oops ;)
4080

    
4081
        return pos;
4082
    }
4083
}
4084

    
4085
static int decode_frame(AVCodecContext *avctx, 
4086
                             void *data, int *data_size,
4087
                             uint8_t *buf, int buf_size)
4088
{
4089
    H264Context *h = avctx->priv_data;
4090
    MpegEncContext *s = &h->s;
4091
    AVFrame *pict = data; 
4092
    int buf_index;
4093
    
4094
    s->flags= avctx->flags;
4095

    
4096
    *data_size = 0;
4097
   
4098
   /* no supplementary picture */
4099
    if (buf_size == 0) {
4100
        return 0;
4101
    }
4102
    
4103
    if(s->flags&CODEC_FLAG_TRUNCATED){
4104
        int next= find_frame_end(s, buf, buf_size);
4105
        
4106
        if( ff_combine_frame(s, next, &buf, &buf_size) < 0 )
4107
            return buf_size;
4108
//printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
4109
    }
4110

    
4111
    if(s->avctx->extradata_size && s->picture_number==0){
4112
        if(0 < decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) ) 
4113
            return -1;
4114
    }
4115

    
4116
    buf_index=decode_nal_units(h, buf, buf_size);
4117
    if(buf_index < 0) 
4118
        return -1;
4119

    
4120
    //FIXME do something with unavailable reference frames    
4121
 
4122
//    if(ret==FRAME_SKIPED) return get_consumed_bytes(s, buf_index, buf_size);
4123
#if 0
4124
    if(s->pict_type==B_TYPE || s->low_delay){
4125
        *pict= *(AVFrame*)&s->current_picture;
4126
    } else {
4127
        *pict= *(AVFrame*)&s->last_picture;
4128
    }
4129
#endif
4130
    if(!s->current_picture_ptr){
4131
        fprintf(stderr, "error, NO frame\n");
4132
        return -1;
4133
    }
4134

    
4135
    *pict= *(AVFrame*)&s->current_picture; //FIXME 
4136
    ff_print_debug_info(s, s->current_picture_ptr);
4137
    assert(pict->data[0]);
4138
//printf("out %d\n", (int)pict->data[0]);
4139
#if 0 //?
4140

4141
    /* Return the Picture timestamp as the frame number */
4142
    /* we substract 1 because it is added on utils.c    */
4143
    avctx->frame_number = s->picture_number - 1;
4144
#endif
4145
#if 0
4146
    /* dont output the last pic after seeking */
4147
    if(s->last_picture_ptr || s->low_delay)
4148
    //Note this isnt a issue as a IDR pic should flush teh buffers
4149
#endif
4150
        *data_size = sizeof(AVFrame);
4151
    return get_consumed_bytes(s, buf_index, buf_size);
4152
}
4153
#if 0
4154
static inline void fill_mb_avail(H264Context *h){
4155
    MpegEncContext * const s = &h->s;
4156
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4157

4158
    if(s->mb_y){
4159
        h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
4160
        h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
4161
        h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
4162
    }else{
4163
        h->mb_avail[0]=
4164
        h->mb_avail[1]=
4165
        h->mb_avail[2]= 0;
4166
    }
4167
    h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
4168
    h->mb_avail[4]= 1; //FIXME move out
4169
    h->mb_avail[5]= 0; //FIXME move out
4170
}
4171
#endif
4172

    
4173
#if 0 //selftest
4174
#define COUNT 8000
4175
#define SIZE (COUNT*40)
4176
int main(){
4177
    int i;
4178
    uint8_t temp[SIZE];
4179
    PutBitContext pb;
4180
    GetBitContext gb;
4181
//    int int_temp[10000];
4182
    DSPContext dsp;
4183
    AVCodecContext avctx;
4184
    
4185
    dsputil_init(&dsp, &avctx);
4186

4187
    init_put_bits(&pb, temp, SIZE, NULL, NULL);
4188
    printf("testing unsigned exp golomb\n");
4189
    for(i=0; i<COUNT; i++){
4190
        START_TIMER
4191
        set_ue_golomb(&pb, i);
4192
        STOP_TIMER("set_ue_golomb");
4193
    }
4194
    flush_put_bits(&pb);
4195
    
4196
    init_get_bits(&gb, temp, 8*SIZE);
4197
    for(i=0; i<COUNT; i++){
4198
        int j, s;
4199
        
4200
        s= show_bits(&gb, 24);
4201
        
4202
        START_TIMER
4203
        j= get_ue_golomb(&gb);
4204
        if(j != i){
4205
            printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
4206
//            return -1;
4207
        }
4208
        STOP_TIMER("get_ue_golomb");
4209
    }
4210
    
4211
    
4212
    init_put_bits(&pb, temp, SIZE, NULL, NULL);
4213
    printf("testing signed exp golomb\n");
4214
    for(i=0; i<COUNT; i++){
4215
        START_TIMER
4216
        set_se_golomb(&pb, i - COUNT/2);
4217
        STOP_TIMER("set_se_golomb");
4218
    }
4219
    flush_put_bits(&pb);
4220
    
4221
    init_get_bits(&gb, temp, 8*SIZE);
4222
    for(i=0; i<COUNT; i++){
4223
        int j, s;
4224
        
4225
        s= show_bits(&gb, 24);
4226
        
4227
        START_TIMER
4228
        j= get_se_golomb(&gb);
4229
        if(j != i - COUNT/2){
4230
            printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
4231
//            return -1;
4232
        }
4233
        STOP_TIMER("get_se_golomb");
4234
    }
4235

4236
    printf("testing 4x4 (I)DCT\n");
4237
    
4238
    DCTELEM block[16];
4239
    uint8_t src[16], ref[16];
4240
    uint64_t error= 0, max_error=0;
4241

4242
    for(i=0; i<COUNT; i++){
4243
        int j;
4244
//        printf("%d %d %d\n", r1, r2, (r2-r1)*16);
4245
        for(j=0; j<16; j++){
4246
            ref[j]= random()%255;
4247
            src[j]= random()%255;
4248
        }
4249

4250
        h264_diff_dct_c(block, src, ref, 4);
4251
        
4252
        //normalize
4253
        for(j=0; j<16; j++){
4254
//            printf("%d ", block[j]);
4255
            block[j]= block[j]*4;
4256
            if(j&1) block[j]= (block[j]*4 + 2)/5;
4257
            if(j&4) block[j]= (block[j]*4 + 2)/5;
4258
        }
4259
//        printf("\n");
4260
        
4261
        h264_add_idct_c(ref, block, 4);
4262
/*        for(j=0; j<16; j++){
4263
            printf("%d ", ref[j]);
4264
        }
4265
        printf("\n");*/
4266
            
4267
        for(j=0; j<16; j++){
4268
            int diff= ABS(src[j] - ref[j]);
4269
            
4270
            error+= diff*diff;
4271
            max_error= FFMAX(max_error, diff);
4272
        }
4273
    }
4274
    printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
4275
#if 0
4276
    printf("testing quantizer\n");
4277
    for(qp=0; qp<52; qp++){
4278
        for(i=0; i<16; i++)
4279
            src1_block[i]= src2_block[i]= random()%255;
4280
        
4281
    }
4282
#endif
4283
    printf("Testing NAL layer\n");
4284
    
4285
    uint8_t bitstream[COUNT];
4286
    uint8_t nal[COUNT*2];
4287
    H264Context h;
4288
    memset(&h, 0, sizeof(H264Context));
4289
    
4290
    for(i=0; i<COUNT; i++){
4291
        int zeros= i;
4292
        int nal_length;
4293
        int consumed;
4294
        int out_length;
4295
        uint8_t *out;
4296
        int j;
4297
        
4298
        for(j=0; j<COUNT; j++){
4299
            bitstream[j]= (random() % 255) + 1;
4300
        }
4301
        
4302
        for(j=0; j<zeros; j++){
4303
            int pos= random() % COUNT;
4304
            while(bitstream[pos] == 0){
4305
                pos++;
4306
                pos %= COUNT;
4307
            }
4308
            bitstream[pos]=0;
4309
        }
4310
        
4311
        START_TIMER
4312
        
4313
        nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
4314
        if(nal_length<0){
4315
            printf("encoding failed\n");
4316
            return -1;
4317
        }
4318
        
4319
        out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
4320

    
4321
        STOP_TIMER("NAL")
4322
        
4323
        if(out_length != COUNT){
4324
            printf("incorrect length %d %d\n", out_length, COUNT);
4325
            return -1;
4326
        }
4327
        
4328
        if(consumed != nal_length){
4329
            printf("incorrect consumed length %d %d\n", nal_length, consumed);
4330
            return -1;
4331
        }
4332
        
4333
        if(memcmp(bitstream, out, COUNT)){
4334
            printf("missmatch\n");
4335
            return -1;
4336
        }
4337
    }
4338
    
4339
    printf("Testing RBSP\n");
4340
    
4341
    
4342
    return 0;
4343
}
4344
#endif
4345

    
4346

    
4347
static int decode_end(AVCodecContext *avctx)
4348
{
4349
    H264Context *h = avctx->priv_data;
4350
    MpegEncContext *s = &h->s;
4351
    
4352
    free_tables(h); //FIXME cleanup init stuff perhaps
4353
    MPV_common_end(s);
4354

    
4355
//    memset(h, 0, sizeof(H264Context));
4356
        
4357
    return 0;
4358
}
4359

    
4360

    
4361
AVCodec h264_decoder = {
4362
    "h264",
4363
    CODEC_TYPE_VIDEO,
4364
    CODEC_ID_H264,
4365
    sizeof(H264Context),
4366
    decode_init,
4367
    NULL,
4368
    decode_end,
4369
    decode_frame,
4370
    /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED,
4371
};
4372