Statistics
| Branch: | Revision:

ffmpeg / libavcodec / h264.c @ 2ca71015

History | View | Annotate | Download (150 KB)

1
/*
2
 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 *
19
 */
20
 
21
/**
22
 * @file h264.c
23
 * H.264 / AVC / MPEG4 part10 codec.
24
 * @author Michael Niedermayer <michaelni@gmx.at>
25
 */
26

    
27
#include "common.h"
28
#include "dsputil.h"
29
#include "avcodec.h"
30
#include "mpegvideo.h"
31
#include "h264data.h"
32
#include "golomb.h"
33

    
34
#undef NDEBUG
35
#include <assert.h>
36

    
37
#define interlaced_dct interlaced_dct_is_a_bad_name
38
#define mb_intra mb_intra_isnt_initalized_see_mb_type
39

    
40
#define LUMA_DC_BLOCK_INDEX   25
41
#define CHROMA_DC_BLOCK_INDEX 26
42

    
43
#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
44
#define COEFF_TOKEN_VLC_BITS           8
45
#define TOTAL_ZEROS_VLC_BITS           9
46
#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
47
#define RUN_VLC_BITS                   3
48
#define RUN7_VLC_BITS                  6
49

    
50
#define MAX_SPS_COUNT 32
51
#define MAX_PPS_COUNT 256
52

    
53
#define MAX_MMCO_COUNT 66
54

    
55
/**
56
 * Sequence parameter set
57
 */
58
typedef struct SPS{
59
    
60
    int profile_idc;
61
    int level_idc;
62
    int multiple_slice_groups;         ///< more_than_one_slice_group_allowed_flag
63
    int arbitrary_slice_order;         ///< arbitrary_slice_order_allowed_flag
64
    int redundant_slices;              ///< redundant_slices_allowed_flag
65
    int log2_max_frame_num;            ///< log2_max_frame_num_minus4 + 4
66
    int poc_type;                      ///< pic_order_cnt_type
67
    int log2_max_poc_lsb;              ///< log2_max_pic_order_cnt_lsb_minus4
68
    int delta_pic_order_always_zero_flag;
69
    int offset_for_non_ref_pic;
70
    int offset_for_top_to_bottom_field;
71
    int poc_cycle_length;              ///< num_ref_frames_in_pic_order_cnt_cycle
72
    int ref_frame_count;               ///< num_ref_frames
73
    int required_frame_num_update_behaviour_flag;
74
    int mb_width;                      ///< frame_width_in_mbs_minus1 + 1
75
    int mb_height;                     ///< frame_height_in_mbs_minus1 + 1
76
    int frame_mbs_only_flag;
77
    int mb_aff;                        ///<mb_adaptive_frame_field_flag
78
    int direct_8x8_inference_flag;
79
    int vui_parameters_present_flag;
80
    int sar_width;
81
    int sar_height;
82
    short offset_for_ref_frame[256]; //FIXME dyn aloc?
83
}SPS;
84

    
85
/**
86
 * Picture parameter set
87
 */
88
typedef struct PPS{
89
    int sps_id;
90
    int cabac;                  ///< entropy_coding_mode_flag
91
    int pic_order_present;      ///< pic_order_present_flag
92
    int slice_group_count;      ///< num_slice_groups_minus1 + 1
93
    int mb_slice_group_map_type;
94
    int ref_count[2];           ///< num_ref_idx_l0/1_active_minus1 + 1
95
    int weighted_pred;          ///< weighted_pred_flag
96
    int weighted_bipred_idc;
97
    int init_qp;                ///< pic_init_qp_minus26 + 26
98
    int init_qs;                ///< pic_init_qs_minus26 + 26
99
    int chroma_qp_index_offset;
100
    int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
101
    int constrained_intra_pred; ///< constrained_intra_pred_flag
102
    int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
103
    int crop;                   ///< frame_cropping_flag
104
    int crop_left;              ///< frame_cropping_rect_left_offset
105
    int crop_right;             ///< frame_cropping_rect_right_offset
106
    int crop_top;               ///< frame_cropping_rect_top_offset
107
    int crop_bottom;            ///< frame_cropping_rect_bottom_offset
108
}PPS;
109

    
110
/**
111
 * Memory management control operation opcode.
112
 */
113
typedef enum MMCOOpcode{
114
    MMCO_END=0,
115
    MMCO_SHORT2UNUSED,
116
    MMCO_LONG2UNUSED,
117
    MMCO_SHORT2LONG,
118
    MMCO_SET_MAX_LONG,
119
    MMCO_RESET, 
120
    MMCO_LONG,
121
} MMCOOpcode;
122

    
123
/**
124
 * Memory management control operation.
125
 */
126
typedef struct MMCO{
127
    MMCOOpcode opcode;
128
    int short_frame_num;
129
    int long_index;
130
} MMCO;
131

    
132
/**
133
 * H264Context
134
 */
135
typedef struct H264Context{
136
    MpegEncContext s;
137
    int nal_ref_idc;        
138
    int nal_unit_type;
139
#define NAL_SLICE                1
140
#define NAL_DPA                        2
141
#define NAL_DPB                        3
142
#define NAL_DPC                        4
143
#define NAL_IDR_SLICE                5
144
#define NAL_SEI                        6
145
#define NAL_SPS                        7
146
#define NAL_PPS                        8
147
#define NAL_PICTURE_DELIMITER        9
148
#define NAL_FILTER_DATA                10
149
    uint8_t *rbsp_buffer;
150
    int rbsp_buffer_size;
151

    
152
    int chroma_qp; //QPc
153

    
154
    int prev_mb_skiped; //FIXME remove (IMHO not used)
155

    
156
    //prediction stuff
157
    int chroma_pred_mode;
158
    int intra16x16_pred_mode;
159
    
160
    int8_t intra4x4_pred_mode_cache[5*8];
161
    int8_t (*intra4x4_pred_mode)[8];
162
    void (*pred4x4  [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
163
    void (*pred8x8  [4+3])(uint8_t *src, int stride);
164
    void (*pred16x16[4+3])(uint8_t *src, int stride);
165
    unsigned int topleft_samples_available;
166
    unsigned int top_samples_available;
167
    unsigned int topright_samples_available;
168
    unsigned int left_samples_available;
169

    
170
    /**
171
     * non zero coeff count cache.
172
     * is 64 if not available.
173
     */
174
    uint8_t non_zero_count_cache[6*8];
175
    uint8_t (*non_zero_count)[16];
176

    
177
    /**
178
     * Motion vector cache.
179
     */
180
    int16_t mv_cache[2][5*8][2];
181
    int8_t ref_cache[2][5*8];
182
#define LIST_NOT_USED -1 //FIXME rename?
183
#define PART_NOT_AVAILABLE -2
184
    
185
    /**
186
     * is 1 if the specific list MV&references are set to 0,0,-2.
187
     */
188
    int mv_cache_clean[2];
189

    
190
    int block_offset[16+8];
191
    int chroma_subblock_offset[16]; //FIXME remove
192
    
193
    uint16_t *mb2b_xy; //FIXME are these 4 a good idea?
194
    uint16_t *mb2b8_xy;
195
    int b_stride;
196
    int b8_stride;
197

    
198
    int halfpel_flag;
199
    int thirdpel_flag;
200

    
201
    int unknown_svq3_flag;
202
    int next_slice_index;
203

    
204
    SPS sps_buffer[MAX_SPS_COUNT];
205
    SPS sps; ///< current sps
206
    
207
    PPS pps_buffer[MAX_PPS_COUNT];
208
    /**
209
     * current pps
210
     */
211
    PPS pps; //FIXME move tp Picture perhaps? (->no) do we need that?
212

    
213
    int slice_num;
214
    uint8_t *slice_table_base;
215
    uint8_t *slice_table;      ///< slice_table_base + mb_stride + 1
216
    int slice_type;
217
    int slice_type_fixed;
218
    
219
    //interlacing specific flags
220
    int mb_field_decoding_flag;
221
    
222
    int sub_mb_type[4];
223
    
224
    //POC stuff
225
    int poc_lsb;
226
    int poc_msb;
227
    int delta_poc_bottom;
228
    int delta_poc[2];
229
    int frame_num;
230
    int prev_poc_msb;             ///< poc_msb of the last reference pic for POC type 0
231
    int prev_poc_lsb;             ///< poc_lsb of the last reference pic for POC type 0
232
    int frame_num_offset;         ///< for POC type 2
233
    int prev_frame_num_offset;    ///< for POC type 2
234
    int prev_frame_num;           ///< frame_num of the last pic for POC type 1/2
235

    
236
    /**
237
     * frame_num for frames or 2*frame_num for field pics.
238
     */
239
    int curr_pic_num;
240
    
241
    /**
242
     * max_frame_num or 2*max_frame_num for field pics.
243
     */
244
    int max_pic_num;
245

    
246
    //Weighted pred stuff
247
    int luma_log2_weight_denom;
248
    int chroma_log2_weight_denom;
249
    int luma_weight[2][16];
250
    int luma_offset[2][16];
251
    int chroma_weight[2][16][2];
252
    int chroma_offset[2][16][2];
253
   
254
    //deblock
255
    int disable_deblocking_filter_idc;
256
    int slice_alpha_c0_offset_div2;
257
    int slice_beta_offset_div2;
258
     
259
    int redundant_pic_count;
260
    
261
    int direct_spatial_mv_pred;
262

    
263
    /**
264
     * num_ref_idx_l0/1_active_minus1 + 1
265
     */
266
    int ref_count[2];// FIXME split for AFF
267
    Picture *short_ref[16];
268
    Picture *long_ref[16];
269
    Picture default_ref_list[2][32];
270
    Picture ref_list[2][32]; //FIXME size?
271
    Picture field_ref_list[2][32]; //FIXME size?
272
    
273
    /**
274
     * memory management control operations buffer.
275
     */
276
    MMCO mmco[MAX_MMCO_COUNT];
277
    int mmco_index;
278
    
279
    int long_ref_count;  ///< number of actual long term references
280
    int short_ref_count; ///< number of actual short term references
281
    
282
    //data partitioning
283
    GetBitContext intra_gb;
284
    GetBitContext inter_gb;
285
    GetBitContext *intra_gb_ptr;
286
    GetBitContext *inter_gb_ptr;
287
    
288
    DCTELEM mb[16*24] __align8;
289
}H264Context;
290

    
291
static VLC coeff_token_vlc[4];
292
static VLC chroma_dc_coeff_token_vlc;
293

    
294
static VLC total_zeros_vlc[15];
295
static VLC chroma_dc_total_zeros_vlc[3];
296

    
297
static VLC run_vlc[6];
298
static VLC run7_vlc;
299

    
300
static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
301
static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
302

    
303
static inline uint32_t pack16to32(int a, int b){
304
#ifdef WORDS_BIGENDIAN
305
   return (b&0xFFFF) + (a<<16);
306
#else
307
   return (a&0xFFFF) + (b<<16);
308
#endif
309
}
310

    
311
/**
312
 * fill a rectangle.
313
 * @param h height of the recatangle, should be a constant
314
 * @param w width of the recatangle, should be a constant
315
 * @param size the size of val (1 or 4), should be a constant
316
 */
317
static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
318
    uint8_t *p= (uint8_t*)vp;
319
    assert(size==1 || size==4);
320
    
321
    w      *= size;
322
    stride *= size;
323
    
324
//FIXME check what gcc generates for 64 bit on x86 and possible write a 32 bit ver of it
325
    if(w==2 && h==2){
326
        *(uint16_t*)(p + 0)=
327
        *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
328
    }else if(w==2 && h==4){
329
        *(uint16_t*)(p + 0*stride)=
330
        *(uint16_t*)(p + 1*stride)=
331
        *(uint16_t*)(p + 2*stride)=
332
        *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
333
    }else if(w==4 && h==1){
334
        *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
335
    }else if(w==4 && h==2){
336
        *(uint32_t*)(p + 0*stride)=
337
        *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
338
    }else if(w==4 && h==4){
339
        *(uint32_t*)(p + 0*stride)=
340
        *(uint32_t*)(p + 1*stride)=
341
        *(uint32_t*)(p + 2*stride)=
342
        *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
343
    }else if(w==8 && h==1){
344
        *(uint32_t*)(p + 0)=
345
        *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
346
    }else if(w==8 && h==2){
347
        *(uint32_t*)(p + 0 + 0*stride)=
348
        *(uint32_t*)(p + 4 + 0*stride)=
349
        *(uint32_t*)(p + 0 + 1*stride)=
350
        *(uint32_t*)(p + 4 + 1*stride)=  size==4 ? val : val*0x01010101;
351
    }else if(w==8 && h==4){
352
        *(uint64_t*)(p + 0*stride)=
353
        *(uint64_t*)(p + 1*stride)=
354
        *(uint64_t*)(p + 2*stride)=
355
        *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
356
    }else if(w==16 && h==2){
357
        *(uint64_t*)(p + 0+0*stride)=
358
        *(uint64_t*)(p + 8+0*stride)=
359
        *(uint64_t*)(p + 0+1*stride)=
360
        *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
361
    }else if(w==16 && h==4){
362
        *(uint64_t*)(p + 0+0*stride)=
363
        *(uint64_t*)(p + 8+0*stride)=
364
        *(uint64_t*)(p + 0+1*stride)=
365
        *(uint64_t*)(p + 8+1*stride)=
366
        *(uint64_t*)(p + 0+2*stride)=
367
        *(uint64_t*)(p + 8+2*stride)=
368
        *(uint64_t*)(p + 0+3*stride)=
369
        *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
370
    }else
371
        assert(0);
372
}
373

    
374
static inline void fill_caches(H264Context *h, int mb_type){
375
    MpegEncContext * const s = &h->s;
376
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
377
    int topleft_xy, top_xy, topright_xy, left_xy[2];
378
    int topleft_type, top_type, topright_type, left_type[2];
379
    int left_block[4];
380
    int i;
381

    
382
    //wow what a mess, why didnt they simplify the interlacing&intra stuff, i cant imagine that these complex rules are worth it 
383
    
384
    if(h->sps.mb_aff){
385
    //FIXME
386
    }else{
387
        topleft_xy = mb_xy-1 - s->mb_stride;
388
        top_xy     = mb_xy   - s->mb_stride;
389
        topright_xy= mb_xy+1 - s->mb_stride;
390
        left_xy[0]   = mb_xy-1;
391
        left_xy[1]   = mb_xy-1;
392
        left_block[0]= 0;
393
        left_block[1]= 1;
394
        left_block[2]= 2;
395
        left_block[3]= 3;
396
    }
397

    
398
    topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
399
    top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
400
    topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
401
    left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
402
    left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
403

    
404
    if(IS_INTRA(mb_type)){
405
        h->topleft_samples_available= 
406
        h->top_samples_available= 
407
        h->left_samples_available= 0xFFFF;
408
        h->topright_samples_available= 0xEEEA;
409

    
410
        if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
411
            h->topleft_samples_available= 0xB3FF;
412
            h->top_samples_available= 0x33FF;
413
            h->topright_samples_available= 0x26EA;
414
        }
415
        for(i=0; i<2; i++){
416
            if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
417
                h->topleft_samples_available&= 0xDF5F;
418
                h->left_samples_available&= 0x5F5F;
419
            }
420
        }
421
        
422
        if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
423
            h->topleft_samples_available&= 0x7FFF;
424
        
425
        if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
426
            h->topright_samples_available&= 0xFBFF;
427
    
428
        if(IS_INTRA4x4(mb_type)){
429
            if(IS_INTRA4x4(top_type)){
430
                h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
431
                h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
432
                h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
433
                h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
434
            }else{
435
                int pred;
436
                if(IS_INTRA16x16(top_type) || (IS_INTER(top_type) && !h->pps.constrained_intra_pred))
437
                    pred= 2;
438
                else{
439
                    pred= -1;
440
                }
441
                h->intra4x4_pred_mode_cache[4+8*0]=
442
                h->intra4x4_pred_mode_cache[5+8*0]=
443
                h->intra4x4_pred_mode_cache[6+8*0]=
444
                h->intra4x4_pred_mode_cache[7+8*0]= pred;
445
            }
446
            for(i=0; i<2; i++){
447
                if(IS_INTRA4x4(left_type[i])){
448
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
449
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
450
                }else{
451
                    int pred;
452
                    if(IS_INTRA16x16(left_type[i]) || (IS_INTER(left_type[i]) && !h->pps.constrained_intra_pred))
453
                        pred= 2;
454
                    else{
455
                        pred= -1;
456
                    }
457
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
458
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
459
                }
460
            }
461
        }
462
    }
463
    
464
    
465
/*
466
0 . T T. T T T T 
467
1 L . .L . . . . 
468
2 L . .L . . . . 
469
3 . T TL . . . . 
470
4 L . .L . . . . 
471
5 L . .. . . . . 
472
*/
473
//FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
474
    if(top_type){
475
        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][0];
476
        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][1];
477
        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][2];
478
        h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
479
    
480
        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][7];
481
        h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
482
    
483
        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][10];
484
        h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
485
    }else{
486
        h->non_zero_count_cache[4+8*0]=      
487
        h->non_zero_count_cache[5+8*0]=
488
        h->non_zero_count_cache[6+8*0]=
489
        h->non_zero_count_cache[7+8*0]=
490
    
491
        h->non_zero_count_cache[1+8*0]=
492
        h->non_zero_count_cache[2+8*0]=
493
    
494
        h->non_zero_count_cache[1+8*3]=
495
        h->non_zero_count_cache[2+8*3]= 64;
496
    }
497
    
498
    if(left_type[0]){
499
        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][6];
500
        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][5];
501
        h->non_zero_count_cache[0+8*1]= h->non_zero_count[left_xy[0]][9]; //FIXME left_block
502
        h->non_zero_count_cache[0+8*4]= h->non_zero_count[left_xy[0]][12];
503
    }else{
504
        h->non_zero_count_cache[3+8*1]= 
505
        h->non_zero_count_cache[3+8*2]= 
506
        h->non_zero_count_cache[0+8*1]= 
507
        h->non_zero_count_cache[0+8*4]= 64;
508
    }
509
    
510
    if(left_type[1]){
511
        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[1]][4];
512
        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[1]][3];
513
        h->non_zero_count_cache[0+8*2]= h->non_zero_count[left_xy[1]][8];
514
        h->non_zero_count_cache[0+8*5]= h->non_zero_count[left_xy[1]][11];
515
    }else{
516
        h->non_zero_count_cache[3+8*3]= 
517
        h->non_zero_count_cache[3+8*4]= 
518
        h->non_zero_count_cache[0+8*2]= 
519
        h->non_zero_count_cache[0+8*5]= 64;
520
    }
521
    
522
#if 1
523
    if(IS_INTER(mb_type)){
524
        int list;
525
        for(list=0; list<2; list++){
526
            if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list)){
527
                /*if(!h->mv_cache_clean[list]){
528
                    memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
529
                    memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
530
                    h->mv_cache_clean[list]= 1;
531
                }*/
532
                continue; //FIXME direct mode ...
533
            }
534
            h->mv_cache_clean[list]= 0;
535
            
536
            if(IS_INTER(topleft_type)){
537
                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
538
                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
539
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
540
                h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
541
            }else{
542
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
543
                h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
544
            }
545
            
546
            if(IS_INTER(top_type)){
547
                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
548
                const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
549
                *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
550
                *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
551
                *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
552
                *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
553
                h->ref_cache[list][scan8[0] + 0 - 1*8]=
554
                h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
555
                h->ref_cache[list][scan8[0] + 2 - 1*8]=
556
                h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
557
            }else{
558
                *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]= 
559
                *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]= 
560
                *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= 
561
                *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
562
                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
563
            }
564

    
565
            if(IS_INTER(topright_type)){
566
                const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
567
                const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
568
                *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
569
                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
570
            }else{
571
                *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
572
                h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
573
            }
574
            
575
            //FIXME unify cleanup or sth
576
            if(IS_INTER(left_type[0])){
577
                const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
578
                const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
579
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
580
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
581
                h->ref_cache[list][scan8[0] - 1 + 0*8]= 
582
                h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
583
            }else{
584
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
585
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
586
                h->ref_cache[list][scan8[0] - 1 + 0*8]=
587
                h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
588
            }
589
            
590
            if(IS_INTER(left_type[1])){
591
                const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
592
                const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
593
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
594
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
595
                h->ref_cache[list][scan8[0] - 1 + 2*8]= 
596
                h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
597
            }else{
598
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
599
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
600
                h->ref_cache[list][scan8[0] - 1 + 2*8]=
601
                h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
602
            }
603

    
604
            h->ref_cache[list][scan8[5 ]+1] = 
605
            h->ref_cache[list][scan8[7 ]+1] = 
606
            h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewher else)
607
            h->ref_cache[list][scan8[4 ]] = 
608
            h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
609
            *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
610
            *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
611
            *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewher else)
612
            *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
613
            *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
614
        }
615
//FIXME
616

    
617
    }
618
#endif
619
}
620

    
621
static inline void write_back_intra_pred_mode(H264Context *h){
622
    MpegEncContext * const s = &h->s;
623
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
624

    
625
    h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
626
    h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
627
    h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
628
    h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
629
    h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
630
    h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
631
    h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
632
}
633

    
634
/**
635
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
636
 */
637
static inline int check_intra4x4_pred_mode(H264Context *h){
638
    MpegEncContext * const s = &h->s;
639
    static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
640
    static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
641
    int i;
642
    
643
    if(!(h->top_samples_available&0x8000)){
644
        for(i=0; i<4; i++){
645
            int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
646
            if(status<0){
647
                fprintf(stderr, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
648
                return -1;
649
            } else if(status){
650
                h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
651
            }
652
        }
653
    }
654
    
655
    if(!(h->left_samples_available&0x8000)){
656
        for(i=0; i<4; i++){
657
            int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
658
            if(status<0){
659
                fprintf(stderr, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
660
                return -1;
661
            } else if(status){
662
                h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
663
            }
664
        }
665
    }
666

    
667
    return 0;
668
} //FIXME cleanup like next
669

    
670
/**
671
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
672
 */
673
static inline int check_intra_pred_mode(H264Context *h, int mode){
674
    MpegEncContext * const s = &h->s;
675
    static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
676
    static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
677
    
678
    if(!(h->top_samples_available&0x8000)){
679
        mode= top[ mode ];
680
        if(mode<0){
681
            fprintf(stderr, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
682
            return -1;
683
        }
684
    }
685
    
686
    if(!(h->left_samples_available&0x8000)){
687
        mode= left[ mode ];
688
        if(mode<0){
689
            fprintf(stderr, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
690
            return -1;
691
        } 
692
    }
693

    
694
    return mode;
695
}
696

    
697
/**
698
 * gets the predicted intra4x4 prediction mode.
699
 */
700
static inline int pred_intra_mode(H264Context *h, int n){
701
    const int index8= scan8[n];
702
    const int left= h->intra4x4_pred_mode_cache[index8 - 1];
703
    const int top = h->intra4x4_pred_mode_cache[index8 - 8];
704
    const int min= FFMIN(left, top);
705

    
706
    tprintf("mode:%d %d min:%d\n", left ,top, min);
707

    
708
    if(min<0) return DC_PRED;
709
    else      return min;
710
}
711

    
712
static inline void write_back_non_zero_count(H264Context *h){
713
    MpegEncContext * const s = &h->s;
714
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
715

    
716
    h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[4+8*4];
717
    h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[5+8*4];
718
    h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[6+8*4];
719
    h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
720
    h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[7+8*3];
721
    h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[7+8*2];
722
    h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[7+8*1];
723
    
724
    h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[1+8*2];
725
    h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
726
    h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[2+8*1];
727

    
728
    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[1+8*5];
729
    h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
730
    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[2+8*4];
731
}
732

    
733
/**
734
 * gets the predicted number of non zero coefficients.
735
 * @param n block index
736
 */
737
static inline int pred_non_zero_count(H264Context *h, int n){
738
    const int index8= scan8[n];
739
    const int left= h->non_zero_count_cache[index8 - 1];
740
    const int top = h->non_zero_count_cache[index8 - 8];
741
    int i= left + top;
742
    
743
    if(i<64) i= (i+1)>>1;
744

    
745
    tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
746

    
747
    return i&31;
748
}
749

    
750
static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
751
    const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
752

    
753
    if(topright_ref != PART_NOT_AVAILABLE){
754
        *C= h->mv_cache[list][ i - 8 + part_width ];
755
        return topright_ref;
756
    }else{
757
        tprintf("topright MV not available\n");
758

    
759
        *C= h->mv_cache[list][ i - 8 - 1 ];
760
        return h->ref_cache[list][ i - 8 - 1 ];
761
    }
762
}
763

    
764
/**
765
 * gets the predicted MV.
766
 * @param n the block index
767
 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
768
 * @param mx the x component of the predicted motion vector
769
 * @param my the y component of the predicted motion vector
770
 */
771
static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
772
    const int index8= scan8[n];
773
    const int top_ref=      h->ref_cache[list][ index8 - 8 ];
774
    const int left_ref=     h->ref_cache[list][ index8 - 1 ];
775
    const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
776
    const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
777
    const int16_t * C;
778
    int diagonal_ref, match_count;
779

    
780
    assert(part_width==1 || part_width==2 || part_width==4);
781

    
782
/* mv_cache
783
  B . . A T T T T 
784
  U . . L . . , .
785
  U . . L . . . .
786
  U . . L . . , .
787
  . . . L . . . .
788
*/
789

    
790
    diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
791
    match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
792
    
793
    if(match_count > 1){ //most common
794
        *mx= mid_pred(A[0], B[0], C[0]);
795
        *my= mid_pred(A[1], B[1], C[1]);
796
    }else if(match_count==1){
797
        if(left_ref==ref){
798
            *mx= A[0];
799
            *my= A[1];        
800
        }else if(top_ref==ref){
801
            *mx= B[0];
802
            *my= B[1];        
803
        }else{
804
            *mx= C[0];
805
            *my= C[1];        
806
        }
807
    }else{
808
        if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
809
            *mx= A[0];
810
            *my= A[1];        
811
        }else{
812
            *mx= mid_pred(A[0], B[0], C[0]);
813
            *my= mid_pred(A[1], B[1], C[1]);
814
        }
815
    }
816
        
817
    tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
818
}
819

    
820
/**
821
 * gets the directionally predicted 16x8 MV.
822
 * @param n the block index
823
 * @param mx the x component of the predicted motion vector
824
 * @param my the y component of the predicted motion vector
825
 */
826
static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
827
    if(n==0){
828
        const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
829
        const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
830

    
831
        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
832
        
833
        if(top_ref == ref){
834
            *mx= B[0];
835
            *my= B[1];
836
            return;
837
        }
838
    }else{
839
        const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
840
        const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
841
        
842
        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
843

    
844
        if(left_ref == ref){
845
            *mx= A[0];
846
            *my= A[1];
847
            return;
848
        }
849
    }
850

    
851
    //RARE
852
    pred_motion(h, n, 4, list, ref, mx, my);
853
}
854

    
855
/**
856
 * gets the directionally predicted 8x16 MV.
857
 * @param n the block index
858
 * @param mx the x component of the predicted motion vector
859
 * @param my the y component of the predicted motion vector
860
 */
861
static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
862
    if(n==0){
863
        const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
864
        const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
865
        
866
        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
867

    
868
        if(left_ref == ref){
869
            *mx= A[0];
870
            *my= A[1];
871
            return;
872
        }
873
    }else{
874
        const int16_t * C;
875
        int diagonal_ref;
876

    
877
        diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
878
        
879
        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
880

    
881
        if(diagonal_ref == ref){ 
882
            *mx= C[0];
883
            *my= C[1];
884
            return;
885
        }
886
    }
887

    
888
    //RARE
889
    pred_motion(h, n, 2, list, ref, mx, my);
890
}
891

    
892
static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
893
    const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
894
    const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
895

    
896
    tprintf("pred_pskip: (%d) (%d) at %2d %2d", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
897

    
898
    if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
899
       || (top_ref == 0  && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
900
       || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
901
       
902
        *mx = *my = 0;
903
        return;
904
    }
905
        
906
    pred_motion(h, 0, 4, 0, 0, mx, my);
907

    
908
    return;
909
}
910

    
911
static inline void write_back_motion(H264Context *h, int mb_type){
912
    MpegEncContext * const s = &h->s;
913
    const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
914
    const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
915
    int list;
916

    
917
    for(list=0; list<2; list++){
918
        int y;
919
        if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list)){
920
            if(1){ //FIXME skip or never read if mb_type doesnt use it
921
                for(y=0; y<4; y++){
922
                    *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
923
                    *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
924
                }
925
                for(y=0; y<2; y++){
926
                    *(uint16_t*)s->current_picture.motion_val[list][b8_xy + y*h->b8_stride]= (LIST_NOT_USED&0xFF)*0x0101;
927
                }
928
            }
929
            continue; //FIXME direct mode ...
930
        }
931
        
932
        for(y=0; y<4; y++){
933
            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
934
            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
935
        }
936
        for(y=0; y<2; y++){
937
            s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
938
            s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
939
        }
940
    }
941
}
942

    
943
/**
944
 * Decodes a network abstraction layer unit.
945
 * @param consumed is the number of bytes used as input
946
 * @param length is the length of the array
947
 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp ttailing?
948
 * @returns decoded bytes, might be src+1 if no escapes 
949
 */
950
static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
951
    int i, si, di;
952
    uint8_t *dst;
953

    
954
//    src[0]&0x80;                //forbidden bit
955
    h->nal_ref_idc= src[0]>>5;
956
    h->nal_unit_type= src[0]&0x1F;
957

    
958
    src++; length--;
959
#if 0    
960
    for(i=0; i<length; i++)
961
        printf("%2X ", src[i]);
962
#endif
963
    for(i=0; i+1<length; i+=2){
964
        if(src[i]) continue;
965
        if(i>0 && src[i-1]==0) i--;
966
        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
967
            if(src[i+2]!=3){
968
                /* startcode, so we must be past the end */
969
                length=i;
970
            }
971
            break;
972
        }
973
    }
974

    
975
    if(i>=length-1){ //no escaped 0
976
        *dst_length= length;
977
        *consumed= length+1; //+1 for the header
978
        return src; 
979
    }
980

    
981
    h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
982
    dst= h->rbsp_buffer;
983

    
984
//printf("deoding esc\n");
985
    si=di=0;
986
    while(si<length){ 
987
        //remove escapes (very rare 1:2^22)
988
        if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
989
            if(src[si+2]==3){ //escape
990
                dst[di++]= 0;
991
                dst[di++]= 0;
992
                si+=3;
993
            }else //next start code
994
                break;
995
        }
996

    
997
        dst[di++]= src[si++];
998
    }
999

    
1000
    *dst_length= di;
1001
    *consumed= si + 1;//+1 for the header
1002
//FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1003
    return dst;
1004
}
1005

    
1006
/**
1007
 * @param src the data which should be escaped
1008
 * @param dst the target buffer, dst+1 == src is allowed as a special case
1009
 * @param length the length of the src data
1010
 * @param dst_length the length of the dst array
1011
 * @returns length of escaped data in bytes or -1 if an error occured
1012
 */
1013
static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1014
    int i, escape_count, si, di;
1015
    uint8_t *temp;
1016
    
1017
    assert(length>=0);
1018
    assert(dst_length>0);
1019
    
1020
    dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1021

    
1022
    if(length==0) return 1;
1023

    
1024
    escape_count= 0;
1025
    for(i=0; i<length; i+=2){
1026
        if(src[i]) continue;
1027
        if(i>0 && src[i-1]==0) 
1028
            i--;
1029
        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1030
            escape_count++;
1031
            i+=2;
1032
        }
1033
    }
1034
    
1035
    if(escape_count==0){ 
1036
        if(dst+1 != src)
1037
            memcpy(dst+1, src, length);
1038
        return length + 1;
1039
    }
1040
    
1041
    if(length + escape_count + 1> dst_length)
1042
        return -1;
1043

    
1044
    //this should be damn rare (hopefully)
1045

    
1046
    h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1047
    temp= h->rbsp_buffer;
1048
//printf("encoding esc\n");
1049
    
1050
    si= 0;
1051
    di= 0;
1052
    while(si < length){
1053
        if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1054
            temp[di++]= 0; si++;
1055
            temp[di++]= 0; si++;
1056
            temp[di++]= 3; 
1057
            temp[di++]= src[si++];
1058
        }
1059
        else
1060
            temp[di++]= src[si++];
1061
    }
1062
    memcpy(dst+1, temp, length+escape_count);
1063
    
1064
    assert(di == length+escape_count);
1065
    
1066
    return di + 1;
1067
}
1068

    
1069
/**
1070
 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1071
 */
1072
static void encode_rbsp_trailing(PutBitContext *pb){
1073
    int length;
1074
    put_bits(pb, 1, 1);
1075
    length= (-get_bit_count(pb))&7;
1076
    if(length) put_bits(pb, length, 0);
1077
}
1078

    
1079
/**
1080
 * identifies the exact end of the bitstream
1081
 * @return the length of the trailing, or 0 if damaged
1082
 */
1083
static int decode_rbsp_trailing(uint8_t *src){
1084
    int v= *src;
1085
    int r;
1086

    
1087
    tprintf("rbsp trailing %X\n", v);
1088

    
1089
    for(r=1; r<9; r++){
1090
        if(v&1) return r;
1091
        v>>=1;
1092
    }
1093
    return 0;
1094
}
1095

    
1096
/**
1097
 * idct tranforms the 16 dc values and dequantize them.
1098
 * @param qp quantization parameter
1099
 */
1100
static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
1101
    const int qmul= dequant_coeff[qp][0];
1102
#define stride 16
1103
    int i;
1104
    int temp[16]; //FIXME check if this is a good idea
1105
    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1106
    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1107

    
1108
//memset(block, 64, 2*256);
1109
//return;
1110
    for(i=0; i<4; i++){
1111
        const int offset= y_offset[i];
1112
        const int z0= block[offset+stride*0] + block[offset+stride*4];
1113
        const int z1= block[offset+stride*0] - block[offset+stride*4];
1114
        const int z2= block[offset+stride*1] - block[offset+stride*5];
1115
        const int z3= block[offset+stride*1] + block[offset+stride*5];
1116

    
1117
        temp[4*i+0]= z0+z3;
1118
        temp[4*i+1]= z1+z2;
1119
        temp[4*i+2]= z1-z2;
1120
        temp[4*i+3]= z0-z3;
1121
    }
1122

    
1123
    for(i=0; i<4; i++){
1124
        const int offset= x_offset[i];
1125
        const int z0= temp[4*0+i] + temp[4*2+i];
1126
        const int z1= temp[4*0+i] - temp[4*2+i];
1127
        const int z2= temp[4*1+i] - temp[4*3+i];
1128
        const int z3= temp[4*1+i] + temp[4*3+i];
1129

    
1130
        block[stride*0 +offset]= ((z0 + z3)*qmul + 2)>>2; //FIXME think about merging this into decode_resdual
1131
        block[stride*2 +offset]= ((z1 + z2)*qmul + 2)>>2;
1132
        block[stride*8 +offset]= ((z1 - z2)*qmul + 2)>>2;
1133
        block[stride*10+offset]= ((z0 - z3)*qmul + 2)>>2;
1134
    }
1135
}
1136

    
1137
/**
1138
 * dct tranforms the 16 dc values.
1139
 * @param qp quantization parameter ??? FIXME
1140
 */
1141
static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1142
//    const int qmul= dequant_coeff[qp][0];
1143
    int i;
1144
    int temp[16]; //FIXME check if this is a good idea
1145
    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1146
    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1147

    
1148
    for(i=0; i<4; i++){
1149
        const int offset= y_offset[i];
1150
        const int z0= block[offset+stride*0] + block[offset+stride*4];
1151
        const int z1= block[offset+stride*0] - block[offset+stride*4];
1152
        const int z2= block[offset+stride*1] - block[offset+stride*5];
1153
        const int z3= block[offset+stride*1] + block[offset+stride*5];
1154

    
1155
        temp[4*i+0]= z0+z3;
1156
        temp[4*i+1]= z1+z2;
1157
        temp[4*i+2]= z1-z2;
1158
        temp[4*i+3]= z0-z3;
1159
    }
1160

    
1161
    for(i=0; i<4; i++){
1162
        const int offset= x_offset[i];
1163
        const int z0= temp[4*0+i] + temp[4*2+i];
1164
        const int z1= temp[4*0+i] - temp[4*2+i];
1165
        const int z2= temp[4*1+i] - temp[4*3+i];
1166
        const int z3= temp[4*1+i] + temp[4*3+i];
1167

    
1168
        block[stride*0 +offset]= (z0 + z3)>>1;
1169
        block[stride*2 +offset]= (z1 + z2)>>1;
1170
        block[stride*8 +offset]= (z1 - z2)>>1;
1171
        block[stride*10+offset]= (z0 - z3)>>1;
1172
    }
1173
}
1174
#undef xStride
1175
#undef stride
1176

    
1177
static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){
1178
    const int qmul= dequant_coeff[qp][0];
1179
    const int stride= 16*2;
1180
    const int xStride= 16;
1181
    int a,b,c,d,e;
1182

    
1183
    a= block[stride*0 + xStride*0];
1184
    b= block[stride*0 + xStride*1];
1185
    c= block[stride*1 + xStride*0];
1186
    d= block[stride*1 + xStride*1];
1187

    
1188
    e= a-b;
1189
    a= a+b;
1190
    b= c-d;
1191
    c= c+d;
1192

    
1193
    block[stride*0 + xStride*0]= ((a+c)*qmul + 0)>>1;
1194
    block[stride*0 + xStride*1]= ((e+b)*qmul + 0)>>1;
1195
    block[stride*1 + xStride*0]= ((a-c)*qmul + 0)>>1;
1196
    block[stride*1 + xStride*1]= ((e-b)*qmul + 0)>>1;
1197
}
1198

    
1199
static void chroma_dc_dct_c(DCTELEM *block){
1200
    const int stride= 16*2;
1201
    const int xStride= 16;
1202
    int a,b,c,d,e;
1203

    
1204
    a= block[stride*0 + xStride*0];
1205
    b= block[stride*0 + xStride*1];
1206
    c= block[stride*1 + xStride*0];
1207
    d= block[stride*1 + xStride*1];
1208

    
1209
    e= a-b;
1210
    a= a+b;
1211
    b= c-d;
1212
    c= c+d;
1213

    
1214
    block[stride*0 + xStride*0]= (a+c);
1215
    block[stride*0 + xStride*1]= (e+b);
1216
    block[stride*1 + xStride*0]= (a-c);
1217
    block[stride*1 + xStride*1]= (e-b);
1218
}
1219

    
1220
/**
1221
 * gets the chroma qp.
1222
 */
1223
static inline int get_chroma_qp(H264Context *h, int qscale){
1224
    
1225
    return chroma_qp[clip(qscale + h->pps.chroma_qp_index_offset, 0, 51)];
1226
}
1227

    
1228

    
1229
/**
1230
 *
1231
 */
1232
static void h264_add_idct_c(uint8_t *dst, DCTELEM *block, int stride){
1233
    int i;
1234
    uint8_t *cm = cropTbl + MAX_NEG_CROP;
1235

    
1236
    block[0] += 32;
1237
#if 1
1238
    for(i=0; i<4; i++){
1239
        const int z0=  block[i + 4*0]     +  block[i + 4*2];
1240
        const int z1=  block[i + 4*0]     -  block[i + 4*2];
1241
        const int z2= (block[i + 4*1]>>1) -  block[i + 4*3];
1242
        const int z3=  block[i + 4*1]     + (block[i + 4*3]>>1);
1243

    
1244
        block[i + 4*0]= z0 + z3;
1245
        block[i + 4*1]= z1 + z2;
1246
        block[i + 4*2]= z1 - z2;
1247
        block[i + 4*3]= z0 - z3;
1248
    }
1249

    
1250
    for(i=0; i<4; i++){
1251
        const int z0=  block[0 + 4*i]     +  block[2 + 4*i];
1252
        const int z1=  block[0 + 4*i]     -  block[2 + 4*i];
1253
        const int z2= (block[1 + 4*i]>>1) -  block[3 + 4*i];
1254
        const int z3=  block[1 + 4*i]     + (block[3 + 4*i]>>1);
1255

    
1256
        dst[0 + i*stride]= cm[ dst[0 + i*stride] + ((z0 + z3) >> 6) ];
1257
        dst[1 + i*stride]= cm[ dst[1 + i*stride] + ((z1 + z2) >> 6) ];
1258
        dst[2 + i*stride]= cm[ dst[2 + i*stride] + ((z1 - z2) >> 6) ];
1259
        dst[3 + i*stride]= cm[ dst[3 + i*stride] + ((z0 - z3) >> 6) ];
1260
    }
1261
#else
1262
    for(i=0; i<4; i++){
1263
        const int z0=  block[0 + 4*i]     +  block[2 + 4*i];
1264
        const int z1=  block[0 + 4*i]     -  block[2 + 4*i];
1265
        const int z2= (block[1 + 4*i]>>1) -  block[3 + 4*i];
1266
        const int z3=  block[1 + 4*i]     + (block[3 + 4*i]>>1);
1267

    
1268
        block[0 + 4*i]= z0 + z3;
1269
        block[1 + 4*i]= z1 + z2;
1270
        block[2 + 4*i]= z1 - z2;
1271
        block[3 + 4*i]= z0 - z3;
1272
    }
1273

    
1274
    for(i=0; i<4; i++){
1275
        const int z0=  block[i + 4*0]     +  block[i + 4*2];
1276
        const int z1=  block[i + 4*0]     -  block[i + 4*2];
1277
        const int z2= (block[i + 4*1]>>1) -  block[i + 4*3];
1278
        const int z3=  block[i + 4*1]     + (block[i + 4*3]>>1);
1279

    
1280
        dst[i + 0*stride]= cm[ dst[i + 0*stride] + ((z0 + z3) >> 6) ];
1281
        dst[i + 1*stride]= cm[ dst[i + 1*stride] + ((z1 + z2) >> 6) ];
1282
        dst[i + 2*stride]= cm[ dst[i + 2*stride] + ((z1 - z2) >> 6) ];
1283
        dst[i + 3*stride]= cm[ dst[i + 3*stride] + ((z0 - z3) >> 6) ];
1284
    }
1285
#endif
1286
}
1287

    
1288
static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1289
    int i;
1290
    //FIXME try int temp instead of block
1291
    
1292
    for(i=0; i<4; i++){
1293
        const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1294
        const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1295
        const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1296
        const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1297
        const int z0= d0 + d3;
1298
        const int z3= d0 - d3;
1299
        const int z1= d1 + d2;
1300
        const int z2= d1 - d2;
1301
        
1302
        block[0 + 4*i]=   z0 +   z1;
1303
        block[1 + 4*i]= 2*z3 +   z2;
1304
        block[2 + 4*i]=   z0 -   z1;
1305
        block[3 + 4*i]=   z3 - 2*z2;
1306
    }    
1307

    
1308
    for(i=0; i<4; i++){
1309
        const int z0= block[0*4 + i] + block[3*4 + i];
1310
        const int z3= block[0*4 + i] - block[3*4 + i];
1311
        const int z1= block[1*4 + i] + block[2*4 + i];
1312
        const int z2= block[1*4 + i] - block[2*4 + i];
1313
        
1314
        block[0*4 + i]=   z0 +   z1;
1315
        block[1*4 + i]= 2*z3 +   z2;
1316
        block[2*4 + i]=   z0 -   z1;
1317
        block[3*4 + i]=   z3 - 2*z2;
1318
    }
1319
}
1320

    
1321
//FIXME need to check that this doesnt overflow signed 32 bit for low qp, iam not sure, its very close
1322
//FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1323
static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1324
    int i;
1325
    const int * const quant_table= quant_coeff[qscale];
1326
    const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1327
    const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1328
    const unsigned int threshold2= (threshold1<<1);
1329
    int last_non_zero;
1330

    
1331
    if(seperate_dc){
1332
        if(qscale<=18){
1333
            //avoid overflows
1334
            const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1335
            const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1336
            const unsigned int dc_threshold2= (dc_threshold1<<1);
1337

    
1338
            int level= block[0]*quant_coeff[qscale+18][0];
1339
            if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1340
                if(level>0){
1341
                    level= (dc_bias + level)>>(QUANT_SHIFT-2);
1342
                    block[0]= level;
1343
                }else{
1344
                    level= (dc_bias - level)>>(QUANT_SHIFT-2);
1345
                    block[0]= -level;
1346
                }
1347
//                last_non_zero = i;
1348
            }else{
1349
                block[0]=0;
1350
            }
1351
        }else{
1352
            const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1353
            const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1354
            const unsigned int dc_threshold2= (dc_threshold1<<1);
1355

    
1356
            int level= block[0]*quant_table[0];
1357
            if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1358
                if(level>0){
1359
                    level= (dc_bias + level)>>(QUANT_SHIFT+1);
1360
                    block[0]= level;
1361
                }else{
1362
                    level= (dc_bias - level)>>(QUANT_SHIFT+1);
1363
                    block[0]= -level;
1364
                }
1365
//                last_non_zero = i;
1366
            }else{
1367
                block[0]=0;
1368
            }
1369
        }
1370
        last_non_zero= 0;
1371
        i=1;
1372
    }else{
1373
        last_non_zero= -1;
1374
        i=0;
1375
    }
1376

    
1377
    for(; i<16; i++){
1378
        const int j= scantable[i];
1379
        int level= block[j]*quant_table[j];
1380

    
1381
//        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
1382
//           || bias-level >= (1<<(QMAT_SHIFT - 3))){
1383
        if(((unsigned)(level+threshold1))>threshold2){
1384
            if(level>0){
1385
                level= (bias + level)>>QUANT_SHIFT;
1386
                block[j]= level;
1387
            }else{
1388
                level= (bias - level)>>QUANT_SHIFT;
1389
                block[j]= -level;
1390
            }
1391
            last_non_zero = i;
1392
        }else{
1393
            block[j]=0;
1394
        }
1395
    }
1396

    
1397
    return last_non_zero;
1398
}
1399

    
1400
static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1401
    const uint32_t a= ((uint32_t*)(src-stride))[0];
1402
    ((uint32_t*)(src+0*stride))[0]= a;
1403
    ((uint32_t*)(src+1*stride))[0]= a;
1404
    ((uint32_t*)(src+2*stride))[0]= a;
1405
    ((uint32_t*)(src+3*stride))[0]= a;
1406
}
1407

    
1408
static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1409
    ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1410
    ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1411
    ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1412
    ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1413
}
1414

    
1415
static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1416
    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1417
                   + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1418
    
1419
    ((uint32_t*)(src+0*stride))[0]= 
1420
    ((uint32_t*)(src+1*stride))[0]= 
1421
    ((uint32_t*)(src+2*stride))[0]= 
1422
    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
1423
}
1424

    
1425
static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1426
    const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1427
    
1428
    ((uint32_t*)(src+0*stride))[0]= 
1429
    ((uint32_t*)(src+1*stride))[0]= 
1430
    ((uint32_t*)(src+2*stride))[0]= 
1431
    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
1432
}
1433

    
1434
static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1435
    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1436
    
1437
    ((uint32_t*)(src+0*stride))[0]= 
1438
    ((uint32_t*)(src+1*stride))[0]= 
1439
    ((uint32_t*)(src+2*stride))[0]= 
1440
    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
1441
}
1442

    
1443
static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1444
    ((uint32_t*)(src+0*stride))[0]= 
1445
    ((uint32_t*)(src+1*stride))[0]= 
1446
    ((uint32_t*)(src+2*stride))[0]= 
1447
    ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1448
}
1449

    
1450

    
1451
#define LOAD_TOP_RIGHT_EDGE\
1452
    const int t4= topright[0];\
1453
    const int t5= topright[1];\
1454
    const int t6= topright[2];\
1455
    const int t7= topright[3];\
1456

    
1457
#define LOAD_LEFT_EDGE\
1458
    const int l0= src[-1+0*stride];\
1459
    const int l1= src[-1+1*stride];\
1460
    const int l2= src[-1+2*stride];\
1461
    const int l3= src[-1+3*stride];\
1462

    
1463
#define LOAD_TOP_EDGE\
1464
    const int t0= src[ 0-1*stride];\
1465
    const int t1= src[ 1-1*stride];\
1466
    const int t2= src[ 2-1*stride];\
1467
    const int t3= src[ 3-1*stride];\
1468

    
1469
static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1470
    const int lt= src[-1-1*stride];
1471
    LOAD_TOP_EDGE
1472
    LOAD_LEFT_EDGE
1473

    
1474
    src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; 
1475
    src[0+2*stride]=
1476
    src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; 
1477
    src[0+1*stride]=
1478
    src[1+2*stride]=
1479
    src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; 
1480
    src[0+0*stride]=
1481
    src[1+1*stride]=
1482
    src[2+2*stride]=
1483
    src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; 
1484
    src[1+0*stride]=
1485
    src[2+1*stride]=
1486
    src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1487
    src[2+0*stride]=
1488
    src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1489
    src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1490
}
1491

    
1492
static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1493
    LOAD_TOP_EDGE    
1494
    LOAD_TOP_RIGHT_EDGE    
1495
//    LOAD_LEFT_EDGE    
1496

    
1497
    src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1498
    src[1+0*stride]=
1499
    src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1500
    src[2+0*stride]=
1501
    src[1+1*stride]=
1502
    src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1503
    src[3+0*stride]=
1504
    src[2+1*stride]=
1505
    src[1+2*stride]=
1506
    src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1507
    src[3+1*stride]=
1508
    src[2+2*stride]=
1509
    src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1510
    src[3+2*stride]=
1511
    src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1512
    src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1513
}
1514

    
1515
static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1516
    const int lt= src[-1-1*stride];
1517
    LOAD_TOP_EDGE    
1518
    LOAD_LEFT_EDGE    
1519
    const __attribute__((unused)) int unu= l3;
1520

    
1521
    src[0+0*stride]=
1522
    src[1+2*stride]=(lt + t0 + 1)>>1;
1523
    src[1+0*stride]=
1524
    src[2+2*stride]=(t0 + t1 + 1)>>1;
1525
    src[2+0*stride]=
1526
    src[3+2*stride]=(t1 + t2 + 1)>>1;
1527
    src[3+0*stride]=(t2 + t3 + 1)>>1;
1528
    src[0+1*stride]=
1529
    src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1530
    src[1+1*stride]=
1531
    src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
1532
    src[2+1*stride]=
1533
    src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1534
    src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1535
    src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1536
    src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1537
}
1538

    
1539
static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
1540
    LOAD_TOP_EDGE    
1541
    LOAD_TOP_RIGHT_EDGE    
1542
    const __attribute__((unused)) int unu= t7;
1543

    
1544
    src[0+0*stride]=(t0 + t1 + 1)>>1;
1545
    src[1+0*stride]=
1546
    src[0+2*stride]=(t1 + t2 + 1)>>1;
1547
    src[2+0*stride]=
1548
    src[1+2*stride]=(t2 + t3 + 1)>>1;
1549
    src[3+0*stride]=
1550
    src[2+2*stride]=(t3 + t4+ 1)>>1;
1551
    src[3+2*stride]=(t4 + t5+ 1)>>1;
1552
    src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1553
    src[1+1*stride]=
1554
    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1555
    src[2+1*stride]=
1556
    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
1557
    src[3+1*stride]=
1558
    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
1559
    src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
1560
}
1561

    
1562
static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
1563
    LOAD_LEFT_EDGE    
1564

    
1565
    src[0+0*stride]=(l0 + l1 + 1)>>1;
1566
    src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1567
    src[2+0*stride]=
1568
    src[0+1*stride]=(l1 + l2 + 1)>>1;
1569
    src[3+0*stride]=
1570
    src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1571
    src[2+1*stride]=
1572
    src[0+2*stride]=(l2 + l3 + 1)>>1;
1573
    src[3+1*stride]=
1574
    src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
1575
    src[3+2*stride]=
1576
    src[1+3*stride]=
1577
    src[0+3*stride]=
1578
    src[2+2*stride]=
1579
    src[2+3*stride]=
1580
    src[3+3*stride]=l3;
1581
}
1582
    
1583
static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
1584
    const int lt= src[-1-1*stride];
1585
    LOAD_TOP_EDGE    
1586
    LOAD_LEFT_EDGE    
1587
    const __attribute__((unused)) int unu= t3;
1588

    
1589
    src[0+0*stride]=
1590
    src[2+1*stride]=(lt + l0 + 1)>>1;
1591
    src[1+0*stride]=
1592
    src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
1593
    src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
1594
    src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1595
    src[0+1*stride]=
1596
    src[2+2*stride]=(l0 + l1 + 1)>>1;
1597
    src[1+1*stride]=
1598
    src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1599
    src[0+2*stride]=
1600
    src[2+3*stride]=(l1 + l2+ 1)>>1;
1601
    src[1+2*stride]=
1602
    src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1603
    src[0+3*stride]=(l2 + l3 + 1)>>1;
1604
    src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1605
}
1606

    
1607
static void pred16x16_vertical_c(uint8_t *src, int stride){
1608
    int i;
1609
    const uint32_t a= ((uint32_t*)(src-stride))[0];
1610
    const uint32_t b= ((uint32_t*)(src-stride))[1];
1611
    const uint32_t c= ((uint32_t*)(src-stride))[2];
1612
    const uint32_t d= ((uint32_t*)(src-stride))[3];
1613
    
1614
    for(i=0; i<16; i++){
1615
        ((uint32_t*)(src+i*stride))[0]= a;
1616
        ((uint32_t*)(src+i*stride))[1]= b;
1617
        ((uint32_t*)(src+i*stride))[2]= c;
1618
        ((uint32_t*)(src+i*stride))[3]= d;
1619
    }
1620
}
1621

    
1622
static void pred16x16_horizontal_c(uint8_t *src, int stride){
1623
    int i;
1624

    
1625
    for(i=0; i<16; i++){
1626
        ((uint32_t*)(src+i*stride))[0]=
1627
        ((uint32_t*)(src+i*stride))[1]=
1628
        ((uint32_t*)(src+i*stride))[2]=
1629
        ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
1630
    }
1631
}
1632

    
1633
static void pred16x16_dc_c(uint8_t *src, int stride){
1634
    int i, dc=0;
1635

    
1636
    for(i=0;i<16; i++){
1637
        dc+= src[-1+i*stride];
1638
    }
1639
    
1640
    for(i=0;i<16; i++){
1641
        dc+= src[i-stride];
1642
    }
1643

    
1644
    dc= 0x01010101*((dc + 16)>>5);
1645

    
1646
    for(i=0; i<16; i++){
1647
        ((uint32_t*)(src+i*stride))[0]=
1648
        ((uint32_t*)(src+i*stride))[1]=
1649
        ((uint32_t*)(src+i*stride))[2]=
1650
        ((uint32_t*)(src+i*stride))[3]= dc;
1651
    }
1652
}
1653

    
1654
static void pred16x16_left_dc_c(uint8_t *src, int stride){
1655
    int i, dc=0;
1656

    
1657
    for(i=0;i<16; i++){
1658
        dc+= src[-1+i*stride];
1659
    }
1660
    
1661
    dc= 0x01010101*((dc + 8)>>4);
1662

    
1663
    for(i=0; i<16; i++){
1664
        ((uint32_t*)(src+i*stride))[0]=
1665
        ((uint32_t*)(src+i*stride))[1]=
1666
        ((uint32_t*)(src+i*stride))[2]=
1667
        ((uint32_t*)(src+i*stride))[3]= dc;
1668
    }
1669
}
1670

    
1671
static void pred16x16_top_dc_c(uint8_t *src, int stride){
1672
    int i, dc=0;
1673

    
1674
    for(i=0;i<16; i++){
1675
        dc+= src[i-stride];
1676
    }
1677
    dc= 0x01010101*((dc + 8)>>4);
1678

    
1679
    for(i=0; i<16; i++){
1680
        ((uint32_t*)(src+i*stride))[0]=
1681
        ((uint32_t*)(src+i*stride))[1]=
1682
        ((uint32_t*)(src+i*stride))[2]=
1683
        ((uint32_t*)(src+i*stride))[3]= dc;
1684
    }
1685
}
1686

    
1687
static void pred16x16_128_dc_c(uint8_t *src, int stride){
1688
    int i;
1689

    
1690
    for(i=0; i<16; i++){
1691
        ((uint32_t*)(src+i*stride))[0]=
1692
        ((uint32_t*)(src+i*stride))[1]=
1693
        ((uint32_t*)(src+i*stride))[2]=
1694
        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
1695
    }
1696
}
1697

    
1698
static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
1699
  int i, j, k;
1700
  int a;
1701
  uint8_t *cm = cropTbl + MAX_NEG_CROP;
1702
  const uint8_t * const src0 = src+7-stride;
1703
  const uint8_t *src1 = src+8*stride-1;
1704
  const uint8_t *src2 = src1-2*stride;      // == src+6*stride-1;
1705
  int H = src0[1] - src0[-1];
1706
  int V = src1[0] - src2[ 0];
1707
  for(k=2; k<=8; ++k) {
1708
    src1 += stride; src2 -= stride;
1709
    H += k*(src0[k] - src0[-k]);
1710
    V += k*(src1[0] - src2[ 0]);
1711
  }
1712
  if(svq3){
1713
    H = ( 5*(H/4) ) / 16;
1714
    V = ( 5*(V/4) ) / 16;
1715

    
1716
    /* required for 100% accuracy */
1717
    i = H; H = V; V = i;
1718
  }else{
1719
    H = ( 5*H+32 ) >> 6;
1720
    V = ( 5*V+32 ) >> 6;
1721
  }
1722

    
1723
  a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
1724
  for(j=16; j>0; --j) {
1725
    int b = a;
1726
    a += V;
1727
    for(i=-16; i<0; i+=4) {
1728
      src[16+i] = cm[ (b    ) >> 5 ];
1729
      src[17+i] = cm[ (b+  H) >> 5 ];
1730
      src[18+i] = cm[ (b+2*H) >> 5 ];
1731
      src[19+i] = cm[ (b+3*H) >> 5 ];
1732
      b += 4*H;
1733
    }
1734
    src += stride;
1735
  }
1736
}
1737

    
1738
static void pred16x16_plane_c(uint8_t *src, int stride){
1739
    pred16x16_plane_compat_c(src, stride, 0);
1740
}
1741

    
1742
static void pred8x8_vertical_c(uint8_t *src, int stride){
1743
    int i;
1744
    const uint32_t a= ((uint32_t*)(src-stride))[0];
1745
    const uint32_t b= ((uint32_t*)(src-stride))[1];
1746
    
1747
    for(i=0; i<8; i++){
1748
        ((uint32_t*)(src+i*stride))[0]= a;
1749
        ((uint32_t*)(src+i*stride))[1]= b;
1750
    }
1751
}
1752

    
1753
static void pred8x8_horizontal_c(uint8_t *src, int stride){
1754
    int i;
1755

    
1756
    for(i=0; i<8; i++){
1757
        ((uint32_t*)(src+i*stride))[0]=
1758
        ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
1759
    }
1760
}
1761

    
1762
static void pred8x8_128_dc_c(uint8_t *src, int stride){
1763
    int i;
1764

    
1765
    for(i=0; i<4; i++){
1766
        ((uint32_t*)(src+i*stride))[0]= 
1767
        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
1768
    }
1769
    for(i=4; i<8; i++){
1770
        ((uint32_t*)(src+i*stride))[0]= 
1771
        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
1772
    }
1773
}
1774

    
1775
static void pred8x8_left_dc_c(uint8_t *src, int stride){
1776
    int i;
1777
    int dc0, dc2;
1778

    
1779
    dc0=dc2=0;
1780
    for(i=0;i<4; i++){
1781
        dc0+= src[-1+i*stride];
1782
        dc2+= src[-1+(i+4)*stride];
1783
    }
1784
    dc0= 0x01010101*((dc0 + 2)>>2);
1785
    dc2= 0x01010101*((dc2 + 2)>>2);
1786

    
1787
    for(i=0; i<4; i++){
1788
        ((uint32_t*)(src+i*stride))[0]=
1789
        ((uint32_t*)(src+i*stride))[1]= dc0;
1790
    }
1791
    for(i=4; i<8; i++){
1792
        ((uint32_t*)(src+i*stride))[0]=
1793
        ((uint32_t*)(src+i*stride))[1]= dc2;
1794
    }
1795
}
1796

    
1797
static void pred8x8_top_dc_c(uint8_t *src, int stride){
1798
    int i;
1799
    int dc0, dc1;
1800

    
1801
    dc0=dc1=0;
1802
    for(i=0;i<4; i++){
1803
        dc0+= src[i-stride];
1804
        dc1+= src[4+i-stride];
1805
    }
1806
    dc0= 0x01010101*((dc0 + 2)>>2);
1807
    dc1= 0x01010101*((dc1 + 2)>>2);
1808

    
1809
    for(i=0; i<4; i++){
1810
        ((uint32_t*)(src+i*stride))[0]= dc0;
1811
        ((uint32_t*)(src+i*stride))[1]= dc1;
1812
    }
1813
    for(i=4; i<8; i++){
1814
        ((uint32_t*)(src+i*stride))[0]= dc0;
1815
        ((uint32_t*)(src+i*stride))[1]= dc1;
1816
    }
1817
}
1818

    
1819

    
1820
static void pred8x8_dc_c(uint8_t *src, int stride){
1821
    int i;
1822
    int dc0, dc1, dc2, dc3;
1823

    
1824
    dc0=dc1=dc2=0;
1825
    for(i=0;i<4; i++){
1826
        dc0+= src[-1+i*stride] + src[i-stride];
1827
        dc1+= src[4+i-stride];
1828
        dc2+= src[-1+(i+4)*stride];
1829
    }
1830
    dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
1831
    dc0= 0x01010101*((dc0 + 4)>>3);
1832
    dc1= 0x01010101*((dc1 + 2)>>2);
1833
    dc2= 0x01010101*((dc2 + 2)>>2);
1834

    
1835
    for(i=0; i<4; i++){
1836
        ((uint32_t*)(src+i*stride))[0]= dc0;
1837
        ((uint32_t*)(src+i*stride))[1]= dc1;
1838
    }
1839
    for(i=4; i<8; i++){
1840
        ((uint32_t*)(src+i*stride))[0]= dc2;
1841
        ((uint32_t*)(src+i*stride))[1]= dc3;
1842
    }
1843
}
1844

    
1845
static void pred8x8_plane_c(uint8_t *src, int stride){
1846
  int j, k;
1847
  int a;
1848
  uint8_t *cm = cropTbl + MAX_NEG_CROP;
1849
  const uint8_t * const src0 = src+3-stride;
1850
  const uint8_t *src1 = src+4*stride-1;
1851
  const uint8_t *src2 = src1-2*stride;      // == src+2*stride-1;
1852
  int H = src0[1] - src0[-1];
1853
  int V = src1[0] - src2[ 0];
1854
  for(k=2; k<=4; ++k) {
1855
    src1 += stride; src2 -= stride;
1856
    H += k*(src0[k] - src0[-k]);
1857
    V += k*(src1[0] - src2[ 0]);
1858
  }
1859
  H = ( 17*H+16 ) >> 5;
1860
  V = ( 17*V+16 ) >> 5;
1861

    
1862
  a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
1863
  for(j=8; j>0; --j) {
1864
    int b = a;
1865
    a += V;
1866
    src[0] = cm[ (b    ) >> 5 ];
1867
    src[1] = cm[ (b+  H) >> 5 ];
1868
    src[2] = cm[ (b+2*H) >> 5 ];
1869
    src[3] = cm[ (b+3*H) >> 5 ];
1870
    src[4] = cm[ (b+4*H) >> 5 ];
1871
    src[5] = cm[ (b+5*H) >> 5 ];
1872
    src[6] = cm[ (b+6*H) >> 5 ];
1873
    src[7] = cm[ (b+7*H) >> 5 ];
1874
    src += stride;
1875
  }
1876
}
1877

    
1878
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1879
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1880
                           int src_x_offset, int src_y_offset,
1881
                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1882
    MpegEncContext * const s = &h->s;
1883
    const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1884
    const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1885
    const int luma_xy= (mx&3) + ((my&3)<<2);
1886
    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
1887
    uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
1888
    uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
1889
    int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
1890
    int extra_height= extra_width;
1891
    int emu=0;
1892
    const int full_mx= mx>>2;
1893
    const int full_my= my>>2;
1894
    
1895
    assert(pic->data[0]);
1896
    
1897
    if(mx&7) extra_width -= 3;
1898
    if(my&7) extra_height -= 3;
1899
    
1900
    if(   full_mx < 0-extra_width 
1901
       || full_my < 0-extra_height 
1902
       || full_mx + 16/*FIXME*/ > s->width + extra_width 
1903
       || full_my + 16/*FIXME*/ > s->height + extra_height){
1904
        ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, s->width, s->height);
1905
            src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
1906
        emu=1;
1907
    }
1908
    
1909
    qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
1910
    if(!square){
1911
        qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
1912
    }
1913
    
1914
    if(s->flags&CODEC_FLAG_GRAY) return;
1915
    
1916
    if(emu){
1917
        ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
1918
            src_cb= s->edge_emu_buffer;
1919
    }
1920
    chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
1921

    
1922
    if(emu){
1923
        ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
1924
            src_cr= s->edge_emu_buffer;
1925
    }
1926
    chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
1927
}
1928

    
1929
static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1930
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1931
                           int x_offset, int y_offset,
1932
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1933
                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1934
                           int list0, int list1){
1935
    MpegEncContext * const s = &h->s;
1936
    qpel_mc_func *qpix_op=  qpix_put;
1937
    h264_chroma_mc_func chroma_op= chroma_put;
1938
    
1939
    dest_y  += 2*x_offset + 2*y_offset*s->  linesize;
1940
    dest_cb +=   x_offset +   y_offset*s->uvlinesize;
1941
    dest_cr +=   x_offset +   y_offset*s->uvlinesize;
1942
    x_offset += 8*s->mb_x;
1943
    y_offset += 8*s->mb_y;
1944
    
1945
    if(list0){
1946
        Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1947
        mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1948
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
1949
                           qpix_op, chroma_op);
1950

    
1951
        qpix_op=  qpix_avg;
1952
        chroma_op= chroma_avg;
1953
    }
1954

    
1955
    if(list1){
1956
        Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1957
        mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1958
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
1959
                           qpix_op, chroma_op);
1960
    }
1961
}
1962

    
1963
static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1964
                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1965
                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg)){
1966
    MpegEncContext * const s = &h->s;
1967
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1968
    const int mb_type= s->current_picture.mb_type[mb_xy];
1969
    
1970
    assert(IS_INTER(mb_type));
1971
    
1972
    if(IS_16X16(mb_type)){
1973
        mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1974
                qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1975
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1976
    }else if(IS_16X8(mb_type)){
1977
        mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1978
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1979
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1980
        mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1981
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1982
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1983
    }else if(IS_8X16(mb_type)){
1984
        mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
1985
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1986
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1987
        mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
1988
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1989
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1990
    }else{
1991
        int i;
1992
        
1993
        assert(IS_8X8(mb_type));
1994

    
1995
        for(i=0; i<4; i++){
1996
            const int sub_mb_type= h->sub_mb_type[i];
1997
            const int n= 4*i;
1998
            int x_offset= (i&1)<<2;
1999
            int y_offset= (i&2)<<1;
2000

    
2001
            if(IS_SUB_8X8(sub_mb_type)){
2002
                mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2003
                    qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2004
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2005
            }else if(IS_SUB_8X4(sub_mb_type)){
2006
                mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2007
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2008
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2009
                mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2010
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2011
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2012
            }else if(IS_SUB_4X8(sub_mb_type)){
2013
                mc_part(h, n  , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2014
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2015
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2016
                mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2017
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2018
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2019
            }else{
2020
                int j;
2021
                assert(IS_SUB_4X4(sub_mb_type));
2022
                for(j=0; j<4; j++){
2023
                    int sub_x_offset= x_offset + 2*(j&1);
2024
                    int sub_y_offset= y_offset +   (j&2);
2025
                    mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2026
                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2027
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2028
                }
2029
            }
2030
        }
2031
    }
2032
}
2033

    
2034
static void decode_init_vlc(H264Context *h){
2035
    static int done = 0;
2036

    
2037
    if (!done) {
2038
        int i;
2039
        done = 1;
2040

    
2041
        init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5, 
2042
                 &chroma_dc_coeff_token_len [0], 1, 1,
2043
                 &chroma_dc_coeff_token_bits[0], 1, 1);
2044

    
2045
        for(i=0; i<4; i++){
2046
            init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17, 
2047
                     &coeff_token_len [i][0], 1, 1,
2048
                     &coeff_token_bits[i][0], 1, 1);
2049
        }
2050

    
2051
        for(i=0; i<3; i++){
2052
            init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2053
                     &chroma_dc_total_zeros_len [i][0], 1, 1,
2054
                     &chroma_dc_total_zeros_bits[i][0], 1, 1);
2055
        }
2056
        for(i=0; i<15; i++){
2057
            init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16, 
2058
                     &total_zeros_len [i][0], 1, 1,
2059
                     &total_zeros_bits[i][0], 1, 1);
2060
        }
2061

    
2062
        for(i=0; i<6; i++){
2063
            init_vlc(&run_vlc[i], RUN_VLC_BITS, 7, 
2064
                     &run_len [i][0], 1, 1,
2065
                     &run_bits[i][0], 1, 1);
2066
        }
2067
        init_vlc(&run7_vlc, RUN7_VLC_BITS, 16, 
2068
                 &run_len [6][0], 1, 1,
2069
                 &run_bits[6][0], 1, 1);
2070
    }
2071
}
2072

    
2073
/**
2074
 * Sets the intra prediction function pointers.
2075
 */
2076
static void init_pred_ptrs(H264Context *h){
2077
//    MpegEncContext * const s = &h->s;
2078

    
2079
    h->pred4x4[VERT_PRED           ]= pred4x4_vertical_c;
2080
    h->pred4x4[HOR_PRED            ]= pred4x4_horizontal_c;
2081
    h->pred4x4[DC_PRED             ]= pred4x4_dc_c;
2082
    h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2083
    h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2084
    h->pred4x4[VERT_RIGHT_PRED     ]= pred4x4_vertical_right_c;
2085
    h->pred4x4[HOR_DOWN_PRED       ]= pred4x4_horizontal_down_c;
2086
    h->pred4x4[VERT_LEFT_PRED      ]= pred4x4_vertical_left_c;
2087
    h->pred4x4[HOR_UP_PRED         ]= pred4x4_horizontal_up_c;
2088
    h->pred4x4[LEFT_DC_PRED        ]= pred4x4_left_dc_c;
2089
    h->pred4x4[TOP_DC_PRED         ]= pred4x4_top_dc_c;
2090
    h->pred4x4[DC_128_PRED         ]= pred4x4_128_dc_c;
2091

    
2092
    h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_c;
2093
    h->pred8x8[VERT_PRED8x8   ]= pred8x8_vertical_c;
2094
    h->pred8x8[HOR_PRED8x8    ]= pred8x8_horizontal_c;
2095
    h->pred8x8[PLANE_PRED8x8  ]= pred8x8_plane_c;
2096
    h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2097
    h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2098
    h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2099

    
2100
    h->pred16x16[DC_PRED8x8     ]= pred16x16_dc_c;
2101
    h->pred16x16[VERT_PRED8x8   ]= pred16x16_vertical_c;
2102
    h->pred16x16[HOR_PRED8x8    ]= pred16x16_horizontal_c;
2103
    h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_c;
2104
    h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2105
    h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2106
    h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2107
}
2108

    
2109
static void free_tables(H264Context *h){
2110
    av_freep(&h->intra4x4_pred_mode);
2111
    av_freep(&h->non_zero_count);
2112
    av_freep(&h->slice_table_base);
2113
    h->slice_table= NULL;
2114
    
2115
    av_freep(&h->mb2b_xy);
2116
    av_freep(&h->mb2b8_xy);
2117
}
2118

    
2119
/**
2120
 * allocates tables.
2121
 * needs widzh/height
2122
 */
2123
static int alloc_tables(H264Context *h){
2124
    MpegEncContext * const s = &h->s;
2125
    const int big_mb_num= s->mb_stride * (s->mb_height+1);
2126
    int x,y;
2127

    
2128
    CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
2129
    CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
2130
    CHECKED_ALLOCZ(h->slice_table_base  , big_mb_num * sizeof(uint8_t))
2131

    
2132
    memset(h->slice_table_base, -1, big_mb_num  * sizeof(uint8_t));
2133
    h->slice_table= h->slice_table_base + s->mb_stride + 1;
2134

    
2135
    CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint16_t));
2136
    CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint16_t));
2137
    for(y=0; y<s->mb_height; y++){
2138
        for(x=0; x<s->mb_width; x++){
2139
            const int mb_xy= x + y*s->mb_stride;
2140
            const int b_xy = 4*x + 4*y*h->b_stride;
2141
            const int b8_xy= 2*x + 2*y*h->b8_stride;
2142
        
2143
            h->mb2b_xy [mb_xy]= b_xy;
2144
            h->mb2b8_xy[mb_xy]= b8_xy;
2145
        }
2146
    }
2147
    
2148
    return 0;
2149
fail:
2150
    free_tables(h);
2151
    return -1;
2152
}
2153

    
2154
static void common_init(H264Context *h){
2155
    MpegEncContext * const s = &h->s;
2156

    
2157
    s->width = s->avctx->width;
2158
    s->height = s->avctx->height;
2159
    s->codec_id= s->avctx->codec->id;
2160
    
2161
    init_pred_ptrs(h);
2162

    
2163
    s->decode=1; //FIXME
2164
}
2165

    
2166
static int decode_init(AVCodecContext *avctx){
2167
    H264Context *h= avctx->priv_data;
2168
    MpegEncContext * const s = &h->s;
2169

    
2170
    s->avctx = avctx;
2171
    common_init(h);
2172

    
2173
    s->out_format = FMT_H264;
2174
    s->workaround_bugs= avctx->workaround_bugs;
2175

    
2176
    // set defaults
2177
    s->progressive_sequence=1;
2178
//    s->decode_mb= ff_h263_decode_mb;
2179
    s->low_delay= 1;
2180
    avctx->pix_fmt= PIX_FMT_YUV420P;
2181

    
2182
    decode_init_vlc(h);
2183
    
2184
    return 0;
2185
}
2186

    
2187
static void frame_start(H264Context *h){
2188
    MpegEncContext * const s = &h->s;
2189
    int i;
2190

    
2191
    MPV_frame_start(s, s->avctx);
2192
    ff_er_frame_start(s);
2193
    h->mmco_index=0;
2194

    
2195
    assert(s->linesize && s->uvlinesize);
2196

    
2197
    for(i=0; i<16; i++){
2198
        h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2199
        h->chroma_subblock_offset[i]= 2*((scan8[i] - scan8[0])&7) + 2*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2200
    }
2201
    for(i=0; i<4; i++){
2202
        h->block_offset[16+i]=
2203
        h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2204
    }
2205

    
2206
//    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2207
}
2208

    
2209
static void hl_decode_mb(H264Context *h){
2210
    MpegEncContext * const s = &h->s;
2211
    const int mb_x= s->mb_x;
2212
    const int mb_y= s->mb_y;
2213
    const int mb_xy= mb_x + mb_y*s->mb_stride;
2214
    const int mb_type= s->current_picture.mb_type[mb_xy];
2215
    uint8_t  *dest_y, *dest_cb, *dest_cr;
2216
    int linesize, uvlinesize /*dct_offset*/;
2217
    int i;
2218

    
2219
    if(!s->decode)
2220
        return;
2221

    
2222
    if(s->mb_skiped){
2223
    }
2224

    
2225
    dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
2226
    dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2227
    dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2228

    
2229
    if (h->mb_field_decoding_flag) {
2230
        linesize = s->linesize * 2;
2231
        uvlinesize = s->uvlinesize * 2;
2232
        if(mb_y&1){ //FIXME move out of this func?
2233
            dest_y -= s->linesize*15;
2234
            dest_cb-= s->linesize*7;
2235
            dest_cr-= s->linesize*7;
2236
        }
2237
    } else {
2238
        linesize = s->linesize;
2239
        uvlinesize = s->uvlinesize;
2240
//        dct_offset = s->linesize * 16;
2241
    }
2242

    
2243
    if(IS_INTRA(mb_type)){
2244
        if(!(s->flags&CODEC_FLAG_GRAY)){
2245
            h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2246
            h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2247
        }
2248

    
2249
        if(IS_INTRA4x4(mb_type)){
2250
            if(!s->encoding){
2251
                for(i=0; i<16; i++){
2252
                    uint8_t * const ptr= dest_y + h->block_offset[i];
2253
                    uint8_t *topright= ptr + 4 - linesize;
2254
                    const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2255
                    const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2256
                    int tr;
2257

    
2258
                    if(!topright_avail){
2259
                        tr= ptr[3 - linesize]*0x01010101;
2260
                        topright= (uint8_t*) &tr;
2261
                    }
2262

    
2263
                    h->pred4x4[ dir ](ptr, topright, linesize);
2264
                    if(h->non_zero_count_cache[ scan8[i] ]){
2265
                        if(s->codec_id == CODEC_ID_H264)
2266
                            h264_add_idct_c(ptr, h->mb + i*16, linesize);
2267
                        else
2268
                            svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2269
                    }
2270
                }
2271
            }
2272
        }else{
2273
            h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2274
            if(s->codec_id == CODEC_ID_H264)
2275
                h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
2276
            else
2277
                svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2278
        }
2279
    }else if(s->codec_id == CODEC_ID_H264){
2280
        hl_motion(h, dest_y, dest_cb, dest_cr,
2281
                  s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab, 
2282
                  s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab);
2283
    }
2284

    
2285

    
2286
    if(!IS_INTRA4x4(mb_type)){
2287
        if(s->codec_id == CODEC_ID_H264){
2288
            for(i=0; i<16; i++){
2289
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2290
                    uint8_t * const ptr= dest_y + h->block_offset[i];
2291
                    h264_add_idct_c(ptr, h->mb + i*16, linesize);
2292
                }
2293
            }
2294
        }else{
2295
            for(i=0; i<16; i++){
2296
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2297
                    uint8_t * const ptr= dest_y + h->block_offset[i];
2298
                    svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2299
                }
2300
            }
2301
        }
2302
    }
2303

    
2304
    if(!(s->flags&CODEC_FLAG_GRAY)){
2305
        chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
2306
        chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
2307
        if(s->codec_id == CODEC_ID_H264){
2308
            for(i=16; i<16+4; i++){
2309
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2310
                    uint8_t * const ptr= dest_cb + h->block_offset[i];
2311
                    h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
2312
                }
2313
            }
2314
            for(i=20; i<20+4; i++){
2315
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2316
                    uint8_t * const ptr= dest_cr + h->block_offset[i];
2317
                    h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
2318
                }
2319
            }
2320
        }else{
2321
            for(i=16; i<16+4; i++){
2322
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2323
                    uint8_t * const ptr= dest_cb + h->block_offset[i];
2324
                    svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2325
                }
2326
            }
2327
            for(i=20; i<20+4; i++){
2328
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2329
                    uint8_t * const ptr= dest_cr + h->block_offset[i];
2330
                    svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2331
                }
2332
            }
2333
        }
2334
    }
2335
}
2336

    
2337
static void decode_mb_cabac(H264Context *h){
2338
//    MpegEncContext * const s = &h->s;
2339
}
2340

    
2341
/**
2342
 * fills the default_ref_list.
2343
 */
2344
static int fill_default_ref_list(H264Context *h){
2345
    MpegEncContext * const s = &h->s;
2346
    int i;
2347
    Picture sorted_short_ref[16];
2348
    
2349
    if(h->slice_type==B_TYPE){
2350
        int out_i;
2351
        int limit= -1;
2352

    
2353
        for(out_i=0; out_i<h->short_ref_count; out_i++){
2354
            int best_i=-1;
2355
            int best_poc=-1;
2356

    
2357
            for(i=0; i<h->short_ref_count; i++){
2358
                const int poc= h->short_ref[i]->poc;
2359
                if(poc > limit && poc < best_poc){
2360
                    best_poc= poc;
2361
                    best_i= i;
2362
                }
2363
            }
2364
            
2365
            assert(best_i != -1);
2366
            
2367
            limit= best_poc;
2368
            sorted_short_ref[out_i]= *h->short_ref[best_i];
2369
        }
2370
    }
2371

    
2372
    if(s->picture_structure == PICT_FRAME){
2373
        if(h->slice_type==B_TYPE){
2374
            const int current_poc= s->current_picture_ptr->poc;
2375
            int list;
2376

    
2377
            for(list=0; list<2; list++){
2378
                int index=0;
2379

    
2380
                for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++){
2381
                    const int i2= list ? h->short_ref_count - i - 1 : i;
2382
                    const int poc= sorted_short_ref[i2].poc;
2383
                    
2384
                    if(sorted_short_ref[i2].reference != 3) continue; //FIXME refernce field shit
2385

    
2386
                    if((list==1 && poc > current_poc) || (list==0 && poc < current_poc)){
2387
                        h->default_ref_list[list][index  ]= sorted_short_ref[i2];
2388
                        h->default_ref_list[list][index++].pic_id= sorted_short_ref[i2].frame_num;
2389
                    }
2390
                }
2391

    
2392
                for(i=0; i<h->long_ref_count && index < h->ref_count[ list ]; i++){
2393
                    if(h->long_ref[i]->reference != 3) continue;
2394

    
2395
                    h->default_ref_list[ list ][index  ]= *h->long_ref[i];
2396
                    h->default_ref_list[ list ][index++].pic_id= i;;
2397
                }
2398
                
2399
                if(h->long_ref_count > 1 && h->short_ref_count==0){
2400
                    Picture temp= h->default_ref_list[1][0];
2401
                    h->default_ref_list[1][0] = h->default_ref_list[1][1];
2402
                    h->default_ref_list[1][0] = temp;
2403
                }
2404

    
2405
                if(index < h->ref_count[ list ])
2406
                    memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
2407
            }
2408
        }else{
2409
            int index=0;
2410
            for(i=0; i<h->short_ref_count && index < h->ref_count[0]; i++){
2411
                if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
2412
                h->default_ref_list[0][index  ]= *h->short_ref[i];
2413
                h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2414
            }
2415
            for(i=0; i<h->long_ref_count && index < h->ref_count[0]; i++){
2416
                if(h->long_ref[i]->reference != 3) continue;
2417
                h->default_ref_list[0][index  ]= *h->long_ref[i];
2418
                h->default_ref_list[0][index++].pic_id= i;;
2419
            }
2420
            if(index < h->ref_count[0])
2421
                memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2422
        }
2423
    }else{ //FIELD
2424
        if(h->slice_type==B_TYPE){
2425
        }else{
2426
            //FIXME second field balh
2427
        }
2428
    }
2429
    return 0;
2430
}
2431

    
2432
static int decode_ref_pic_list_reordering(H264Context *h){
2433
    MpegEncContext * const s = &h->s;
2434
    int list;
2435
    
2436
    if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move beofre func
2437
    
2438
    for(list=0; list<2; list++){
2439
        memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2440

    
2441
        if(get_bits1(&s->gb)){
2442
            int pred= h->curr_pic_num;
2443
            int index;
2444

    
2445
            for(index=0; ; index++){
2446
                int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2447
                int pic_id;
2448
                int i;
2449
                
2450
                
2451
                if(index >= h->ref_count[list]){
2452
                    fprintf(stderr, "reference count overflow\n");
2453
                    return -1;
2454
                }
2455
                
2456
                if(reordering_of_pic_nums_idc<3){
2457
                    if(reordering_of_pic_nums_idc<2){
2458
                        const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2459

    
2460
                        if(abs_diff_pic_num >= h->max_pic_num){
2461
                            fprintf(stderr, "abs_diff_pic_num overflow\n");
2462
                            return -1;
2463
                        }
2464

    
2465
                        if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2466
                        else                                pred+= abs_diff_pic_num;
2467
                        pred &= h->max_pic_num - 1;
2468
                    
2469
                        for(i= h->ref_count[list]-1; i>=index; i--){
2470
                            if(h->ref_list[list][i].pic_id == pred && h->ref_list[list][i].long_ref==0)
2471
                                break;
2472
                        }
2473
                    }else{
2474
                        pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2475

    
2476
                        for(i= h->ref_count[list]-1; i>=index; i--){
2477
                            if(h->ref_list[list][i].pic_id == pic_id && h->ref_list[list][i].long_ref==1)
2478
                                break;
2479
                        }
2480
                    }
2481

    
2482
                    if(i < index){
2483
                        fprintf(stderr, "reference picture missing during reorder\n");
2484
                        memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2485
                    }else if(i > index){
2486
                        Picture tmp= h->ref_list[list][i];
2487
                        for(; i>index; i--){
2488
                            h->ref_list[list][i]= h->ref_list[list][i-1];
2489
                        }
2490
                        h->ref_list[list][index]= tmp;
2491
                    }
2492
                }else if(reordering_of_pic_nums_idc==3) 
2493
                    break;
2494
                else{
2495
                    fprintf(stderr, "illegal reordering_of_pic_nums_idc\n");
2496
                    return -1;
2497
                }
2498
            }
2499
        }
2500

    
2501
        if(h->slice_type!=B_TYPE) break;
2502
    }
2503
    return 0;    
2504
}
2505

    
2506
static int pred_weight_table(H264Context *h){
2507
    MpegEncContext * const s = &h->s;
2508
    int list, i;
2509
    
2510
    h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2511
    h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2512

    
2513
    for(list=0; list<2; list++){
2514
        for(i=0; i<h->ref_count[list]; i++){
2515
            int luma_weight_flag, chroma_weight_flag;
2516
            
2517
            luma_weight_flag= get_bits1(&s->gb);
2518
            if(luma_weight_flag){
2519
                h->luma_weight[list][i]= get_se_golomb(&s->gb);
2520
                h->luma_offset[list][i]= get_se_golomb(&s->gb);
2521
            }
2522

    
2523
            chroma_weight_flag= get_bits1(&s->gb);
2524
            if(chroma_weight_flag){
2525
                int j;
2526
                for(j=0; j<2; j++){
2527
                    h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2528
                    h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2529
                }
2530
            }
2531
        }
2532
        if(h->slice_type != B_TYPE) break;
2533
    }
2534
    return 0;
2535
}
2536

    
2537
/**
2538
 * instantaneos decoder refresh.
2539
 */
2540
static void idr(H264Context *h){
2541
    int i;
2542

    
2543
    for(i=0; i<h->long_ref_count; i++){
2544
        h->long_ref[i]->reference=0;
2545
        h->long_ref[i]= NULL;
2546
    }
2547
    h->long_ref_count=0;
2548

    
2549
    for(i=0; i<h->short_ref_count; i++){
2550
        h->short_ref[i]->reference=0;
2551
        h->short_ref[i]= NULL;
2552
    }
2553
    h->short_ref_count=0;
2554
}
2555

    
2556
/**
2557
 *
2558
 * @return the removed picture or NULL if an error occures
2559
 */
2560
static Picture * remove_short(H264Context *h, int frame_num){
2561
    MpegEncContext * const s = &h->s;
2562
    int i;
2563
    
2564
    if(s->avctx->debug&FF_DEBUG_MMCO)
2565
        printf("remove short %d count %d\n", frame_num, h->short_ref_count);
2566
    
2567
    for(i=0; i<h->short_ref_count; i++){
2568
        Picture *pic= h->short_ref[i];
2569
        if(s->avctx->debug&FF_DEBUG_MMCO)
2570
            printf("%d %d %p\n", i, pic->frame_num, pic);
2571
        if(pic->frame_num == frame_num){
2572
            h->short_ref[i]= NULL;
2573
            memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
2574
            h->short_ref_count--;
2575
            return pic;
2576
        }
2577
    }
2578
    return NULL;
2579
}
2580

    
2581
/**
2582
 *
2583
 * @return the removed picture or NULL if an error occures
2584
 */
2585
static Picture * remove_long(H264Context *h, int i){
2586
    Picture *pic;
2587

    
2588
    if(i >= h->long_ref_count) return NULL;
2589
    pic= h->long_ref[i];
2590
    if(pic==NULL) return NULL;
2591
    
2592
    h->long_ref[i]= NULL;
2593
    memmove(&h->long_ref[i], &h->long_ref[i+1], (h->long_ref_count - i - 1)*sizeof(Picture*));
2594
    h->long_ref_count--;
2595

    
2596
    return pic;
2597
}
2598

    
2599
/**
2600
 * Executes the reference picture marking (memory management control operations).
2601
 */
2602
static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
2603
    MpegEncContext * const s = &h->s;
2604
    int i;
2605
    int current_is_long=0;
2606
    Picture *pic;
2607
    
2608
    if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
2609
        printf("no mmco here\n");
2610
        
2611
    for(i=0; i<mmco_count; i++){
2612
        if(s->avctx->debug&FF_DEBUG_MMCO)
2613
            printf("mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
2614

    
2615
        switch(mmco[i].opcode){
2616
        case MMCO_SHORT2UNUSED:
2617
            pic= remove_short(h, mmco[i].short_frame_num);
2618
            if(pic==NULL) return -1;
2619
            pic->reference= 0;
2620
            break;
2621
        case MMCO_SHORT2LONG:
2622
            pic= remove_long(h, mmco[i].long_index);
2623
            if(pic) pic->reference=0;
2624
            
2625
            h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
2626
            h->long_ref[ mmco[i].long_index ]->long_ref=1;
2627
            break;
2628
        case MMCO_LONG2UNUSED:
2629
            pic= remove_long(h, mmco[i].long_index);
2630
            if(pic==NULL) return -1;
2631
            pic->reference= 0;
2632
            break;
2633
        case MMCO_LONG:
2634
            pic= remove_long(h, mmco[i].long_index);
2635
            if(pic) pic->reference=0;
2636
            
2637
            h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
2638
            h->long_ref[ mmco[i].long_index ]->long_ref=1;
2639
            h->long_ref_count++;
2640
            
2641
            current_is_long=1;
2642
            break;
2643
        case MMCO_SET_MAX_LONG:
2644
            assert(mmco[i].long_index <= 16);
2645
            while(mmco[i].long_index < h->long_ref_count){
2646
                pic= remove_long(h, mmco[i].long_index);
2647
                pic->reference=0;
2648
            }
2649
            while(mmco[i].long_index > h->long_ref_count){
2650
                h->long_ref[ h->long_ref_count++ ]= NULL;
2651
            }
2652
            break;
2653
        case MMCO_RESET:
2654
            while(h->short_ref_count){
2655
                pic= remove_short(h, h->short_ref[0]->frame_num);
2656
                pic->reference=0;
2657
            }
2658
            while(h->long_ref_count){
2659
                pic= remove_long(h, h->long_ref_count-1);
2660
                pic->reference=0;
2661
            }
2662
            break;
2663
        default: assert(0);
2664
        }
2665
    }
2666
    
2667
    if(!current_is_long){
2668
        pic= remove_short(h, s->current_picture_ptr->frame_num);
2669
        if(pic){
2670
            pic->reference=0;
2671
            fprintf(stderr, "illegal short term buffer state detected\n");
2672
        }
2673
        
2674
        if(h->short_ref_count)
2675
            memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
2676

    
2677
        h->short_ref[0]= s->current_picture_ptr;
2678
        h->short_ref[0]->long_ref=0;
2679
        h->short_ref_count++;
2680
    }
2681
    
2682
    return 0; 
2683
}
2684

    
2685
static int decode_ref_pic_marking(H264Context *h){
2686
    MpegEncContext * const s = &h->s;
2687
    int i;
2688
    
2689
    if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
2690
        s->broken_link= get_bits1(&s->gb) -1;
2691
        h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
2692
        if(h->mmco[0].long_index == -1)
2693
            h->mmco_index= 0;
2694
        else{
2695
            h->mmco[0].opcode= MMCO_LONG;
2696
            h->mmco_index= 1;
2697
        } 
2698
    }else{
2699
        if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
2700
            for(i= h->mmco_index; i<MAX_MMCO_COUNT; i++) { 
2701
                MMCOOpcode opcode= get_ue_golomb(&s->gb);;
2702

    
2703
                h->mmco[i].opcode= opcode;
2704
                if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
2705
                    h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
2706
/*                    if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
2707
                        fprintf(stderr, "illegal short ref in memory management control operation %d\n", mmco);
2708
                        return -1;
2709
                    }*/
2710
                }
2711
                if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
2712
                    h->mmco[i].long_index= get_ue_golomb(&s->gb);
2713
                    if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
2714
                        fprintf(stderr, "illegal long ref in memory management control operation %d\n", opcode);
2715
                        return -1;
2716
                    }
2717
                }
2718
                    
2719
                if(opcode > MMCO_LONG){
2720
                    fprintf(stderr, "illegal memory management control operation %d\n", opcode);
2721
                    return -1;
2722
                }
2723
            }
2724
            h->mmco_index= i;
2725
        }else{
2726
            assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
2727

    
2728
            if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
2729
                h->mmco[0].opcode= MMCO_SHORT2UNUSED;
2730
                h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
2731
                h->mmco_index= 1;
2732
            }else
2733
                h->mmco_index= 0;
2734
        }
2735
    }
2736
    
2737
    return 0; 
2738
}
2739

    
2740
static int init_poc(H264Context *h){
2741
    MpegEncContext * const s = &h->s;
2742
    const int max_frame_num= 1<<h->sps.log2_max_frame_num;
2743
    int field_poc[2];
2744

    
2745
    if(h->nal_unit_type == NAL_IDR_SLICE){
2746
        h->frame_num_offset= 0;
2747
    }else{
2748
        if(h->frame_num < h->prev_frame_num)
2749
            h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
2750
        else
2751
            h->frame_num_offset= h->prev_frame_num_offset;
2752
    }
2753

    
2754
    if(h->sps.poc_type==0){
2755
        const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
2756

    
2757
        if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
2758
            h->poc_msb = h->prev_poc_msb + max_poc_lsb;
2759
        else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
2760
            h->poc_msb = h->prev_poc_msb - max_poc_lsb;
2761
        else
2762
            h->poc_msb = h->prev_poc_msb;
2763
//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
2764
        field_poc[0] = 
2765
        field_poc[1] = h->poc_msb + h->poc_lsb;
2766
        if(s->picture_structure == PICT_FRAME) 
2767
            field_poc[1] += h->delta_poc_bottom;
2768
    }else if(h->sps.poc_type==1){
2769
        int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
2770
        int i;
2771

    
2772
        if(h->sps.poc_cycle_length != 0)
2773
            abs_frame_num = h->frame_num_offset + h->frame_num;
2774
        else
2775
            abs_frame_num = 0;
2776

    
2777
        if(h->nal_ref_idc==0 && abs_frame_num > 0)
2778
            abs_frame_num--;
2779
            
2780
        expected_delta_per_poc_cycle = 0;
2781
        for(i=0; i < h->sps.poc_cycle_length; i++)
2782
            expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
2783

    
2784
        if(abs_frame_num > 0){
2785
            int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
2786
            int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
2787

    
2788
            expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
2789
            for(i = 0; i <= frame_num_in_poc_cycle; i++)
2790
                expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
2791
        } else
2792
            expectedpoc = 0;
2793

    
2794
        if(h->nal_ref_idc == 0) 
2795
            expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
2796
        
2797
        field_poc[0] = expectedpoc + h->delta_poc[0];
2798
        field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
2799

    
2800
        if(s->picture_structure == PICT_FRAME)
2801
            field_poc[1] += h->delta_poc[1];
2802
    }else{
2803
        int poc;
2804
        if(h->nal_unit_type == NAL_IDR_SLICE){
2805
            poc= 0;
2806
        }else{
2807
            if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
2808
            else               poc= 2*(h->frame_num_offset + h->frame_num) - 1;
2809
        }
2810
        field_poc[0]= poc;
2811
        field_poc[1]= poc;
2812
    }
2813
    
2814
    if(s->picture_structure != PICT_BOTTOM_FIELD)
2815
        s->current_picture_ptr->field_poc[0]= field_poc[0];
2816
    if(s->picture_structure != PICT_TOP_FIELD)
2817
        s->current_picture_ptr->field_poc[1]= field_poc[1];
2818
    if(s->picture_structure == PICT_FRAME) // FIXME field pix?
2819
        s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
2820

    
2821
    return 0;
2822
}
2823

    
2824
/**
2825
 * decodes a slice header.
2826
 * this will allso call MPV_common_init() and frame_start() as needed
2827
 */
2828
static int decode_slice_header(H264Context *h){
2829
    MpegEncContext * const s = &h->s;
2830
    int first_mb_in_slice, pps_id;
2831
    int num_ref_idx_active_override_flag;
2832
    static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
2833
    float new_aspect;
2834

    
2835
    s->current_picture.reference= h->nal_ref_idc != 0;
2836

    
2837
    first_mb_in_slice= get_ue_golomb(&s->gb);
2838

    
2839
    h->slice_type= get_ue_golomb(&s->gb);
2840
    if(h->slice_type > 9){
2841
        fprintf(stderr, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
2842
    }
2843
    if(h->slice_type > 4){
2844
        h->slice_type -= 5;
2845
        h->slice_type_fixed=1;
2846
    }else
2847
        h->slice_type_fixed=0;
2848
    
2849
    h->slice_type= slice_type_map[ h->slice_type ];
2850
    
2851
    s->pict_type= h->slice_type; // to make a few old func happy, its wrong though
2852
        
2853
    pps_id= get_ue_golomb(&s->gb);
2854
    if(pps_id>255){
2855
        fprintf(stderr, "pps_id out of range\n");
2856
        return -1;
2857
    }
2858
    h->pps= h->pps_buffer[pps_id];
2859
    if(h->pps.slice_group_count == 0){
2860
        fprintf(stderr, "non existing PPS referenced\n");
2861
        return -1;
2862
    }
2863

    
2864
    h->sps= h->sps_buffer[ h->pps.sps_id ];
2865
    if(h->sps.log2_max_frame_num == 0){
2866
        fprintf(stderr, "non existing SPS referenced\n");
2867
        return -1;
2868
    }
2869
    
2870
    s->mb_width= h->sps.mb_width;
2871
    s->mb_height= h->sps.mb_height;
2872
    
2873
    h->b_stride=  s->mb_width*4;
2874
    h->b8_stride= s->mb_width*2;
2875

    
2876
    s->mb_x = first_mb_in_slice % s->mb_width;
2877
    s->mb_y = first_mb_in_slice / s->mb_width; //FIXME AFFW
2878
    
2879
    s->width = 16*s->mb_width - 2*(h->pps.crop_left + h->pps.crop_right );
2880
    if(h->sps.frame_mbs_only_flag)
2881
        s->height= 16*s->mb_height - 2*(h->pps.crop_top  + h->pps.crop_bottom);
2882
    else
2883
        s->height= 16*s->mb_height - 4*(h->pps.crop_top  + h->pps.crop_bottom); //FIXME recheck
2884
    
2885
    if(h->pps.crop_left || h->pps.crop_top){
2886
        fprintf(stderr, "insane croping not completly supported, this could look slightly wrong ...\n");
2887
    }
2888

    
2889
    if(s->aspected_height) //FIXME emms at end of slice ?
2890
        new_aspect= h->sps.sar_width*s->width / (float)(s->height*h->sps.sar_height);
2891
    else
2892
        new_aspect=0;
2893

    
2894
    if (s->context_initialized 
2895
        && (   s->width != s->avctx->width || s->height != s->avctx->height 
2896
            || ABS(new_aspect - s->avctx->aspect_ratio) > 0.001)) {
2897
        free_tables(h);
2898
        MPV_common_end(s);
2899
    }
2900
    if (!s->context_initialized) {
2901
        if (MPV_common_init(s) < 0)
2902
            return -1;
2903

    
2904
        alloc_tables(h);
2905

    
2906
        s->avctx->width = s->width;
2907
        s->avctx->height = s->height;
2908
        s->avctx->aspect_ratio= new_aspect;
2909
    }
2910

    
2911
    if(first_mb_in_slice == 0){
2912
        frame_start(h);
2913
    }
2914

    
2915
    s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
2916
    h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
2917

    
2918
    if(h->sps.frame_mbs_only_flag){
2919
        s->picture_structure= PICT_FRAME;
2920
    }else{
2921
        if(get_bits1(&s->gb)) //field_pic_flag
2922
            s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
2923
        else
2924
            s->picture_structure= PICT_FRAME;
2925
    }
2926

    
2927
    if(s->picture_structure==PICT_FRAME){
2928
        h->curr_pic_num=   h->frame_num;
2929
        h->max_pic_num= 1<< h->sps.log2_max_frame_num;
2930
    }else{
2931
        h->curr_pic_num= 2*h->frame_num;
2932
        h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
2933
    }
2934
        
2935
    if(h->nal_unit_type == NAL_IDR_SLICE){
2936
        int idr_pic_id= get_ue_golomb(&s->gb);
2937
    }
2938
   
2939
    if(h->sps.poc_type==0){
2940
        h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
2941
        
2942
        if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
2943
            h->delta_poc_bottom= get_se_golomb(&s->gb);
2944
        }
2945
    }
2946
    
2947
    if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
2948
        h->delta_poc[0]= get_se_golomb(&s->gb);
2949
        
2950
        if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
2951
            h->delta_poc[1]= get_se_golomb(&s->gb);
2952
    }
2953
    
2954
    init_poc(h);
2955
    
2956
    if(h->pps.redundant_pic_cnt_present){
2957
        h->redundant_pic_count= get_ue_golomb(&s->gb);
2958
    }
2959

    
2960
    //set defaults, might be overriden a few line later
2961
    h->ref_count[0]= h->pps.ref_count[0];
2962
    h->ref_count[1]= h->pps.ref_count[1];
2963

    
2964
    if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
2965
        if(h->slice_type == B_TYPE){
2966
            h->direct_spatial_mv_pred= get_bits1(&s->gb);
2967
        }
2968
        num_ref_idx_active_override_flag= get_bits1(&s->gb);
2969
    
2970
        if(num_ref_idx_active_override_flag){
2971
            h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
2972
            if(h->slice_type==B_TYPE)
2973
                h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
2974

    
2975
            if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
2976
                fprintf(stderr, "reference overflow\n");
2977
                return -1;
2978
            }
2979
        }
2980
    }
2981

    
2982
    if(first_mb_in_slice == 0){
2983
        fill_default_ref_list(h);
2984
    }
2985

    
2986
    decode_ref_pic_list_reordering(h);
2987

    
2988
    if(   (h->pps.weighted_pred          && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE )) 
2989
       || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
2990
        pred_weight_table(h);
2991
    
2992
    if(s->current_picture.reference)
2993
        decode_ref_pic_marking(h);
2994
    //FIXME CABAC stuff
2995

    
2996
    s->qscale = h->pps.init_qp + get_se_golomb(&s->gb); //slice_qp_delta
2997
    //FIXME qscale / qp ... stuff
2998
    if(h->slice_type == SP_TYPE){
2999
        int sp_for_switch_flag= get_bits1(&s->gb);
3000
    }
3001
    if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
3002
        int slice_qs_delta= get_se_golomb(&s->gb);
3003
    }
3004

    
3005
    if( h->pps.deblocking_filter_parameters_present ) {
3006
        h->disable_deblocking_filter_idc= get_ue_golomb(&s->gb);
3007
        if( h->disable_deblocking_filter_idc  !=  1 ) {
3008
            h->slice_alpha_c0_offset_div2= get_se_golomb(&s->gb);
3009
            h->slice_beta_offset_div2= get_se_golomb(&s->gb);
3010
        }
3011
    }else
3012
        h->disable_deblocking_filter_idc= 0;
3013

    
3014
#if 0 //FMO
3015
    if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3016
        slice_group_change_cycle= get_bits(&s->gb, ?);
3017
#endif
3018

    
3019
    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3020
        printf("mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d\n", 
3021
               first_mb_in_slice, 
3022
               av_get_pict_type_char(h->slice_type),
3023
               pps_id, h->frame_num,
3024
               s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
3025
               h->ref_count[0], h->ref_count[1],
3026
               s->qscale,
3027
               h->disable_deblocking_filter_idc
3028
               );
3029
    }
3030

    
3031
    return 0;
3032
}
3033

    
3034
/**
3035
 *
3036
 */
3037
static inline int get_level_prefix(GetBitContext *gb){
3038
    unsigned int buf;
3039
    int log;
3040
    
3041
    OPEN_READER(re, gb);
3042
    UPDATE_CACHE(re, gb);
3043
    buf=GET_CACHE(re, gb);
3044
    
3045
    log= 32 - av_log2(buf);
3046
#ifdef TRACE
3047
    print_bin(buf>>(32-log), log);
3048
    printf("%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
3049
#endif
3050

    
3051
    LAST_SKIP_BITS(re, gb, log);
3052
    CLOSE_READER(re, gb);
3053

    
3054
    return log-1;
3055
}
3056

    
3057
/**
3058
 * decodes a residual block.
3059
 * @param n block index
3060
 * @param scantable scantable
3061
 * @param max_coeff number of coefficients in the block
3062
 * @return <0 if an error occured
3063
 */
3064
static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, int qp, int max_coeff){
3065
    MpegEncContext * const s = &h->s;
3066
    const uint16_t *qmul= dequant_coeff[qp];
3067
    static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
3068
    int level[16], run[16];
3069
    int suffix_length, zeros_left, coeff_num, coeff_token, total_coeff, i, trailing_ones;
3070

    
3071
    //FIXME put trailing_onex into the context
3072

    
3073
    if(n == CHROMA_DC_BLOCK_INDEX){
3074
        coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
3075
        total_coeff= coeff_token>>2;
3076
    }else{    
3077
        if(n == LUMA_DC_BLOCK_INDEX){
3078
            total_coeff= pred_non_zero_count(h, 0);
3079
            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
3080
            total_coeff= coeff_token>>2;
3081
        }else{
3082
            total_coeff= pred_non_zero_count(h, n);
3083
            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
3084
            total_coeff= coeff_token>>2;
3085
            h->non_zero_count_cache[ scan8[n] ]= total_coeff;
3086
        }
3087
    }
3088

    
3089
    //FIXME set last_non_zero?
3090

    
3091
    if(total_coeff==0)
3092
        return 0;
3093
        
3094
    trailing_ones= coeff_token&3;
3095
    tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
3096
    assert(total_coeff<=16);
3097
    
3098
    for(i=0; i<trailing_ones; i++){
3099
        level[i]= 1 - 2*get_bits1(gb);
3100
    }
3101

    
3102
    suffix_length= total_coeff > 10 && trailing_ones < 3;
3103

    
3104
    for(; i<total_coeff; i++){
3105
        const int prefix= get_level_prefix(gb);
3106
        int level_code, mask;
3107

    
3108
        if(prefix<14){ //FIXME try to build a large unified VLC table for all this
3109
            if(suffix_length)
3110
                level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
3111
            else
3112
                level_code= (prefix<<suffix_length); //part
3113
        }else if(prefix==14){
3114
            if(suffix_length)
3115
                level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
3116
            else
3117
                level_code= prefix + get_bits(gb, 4); //part
3118
        }else if(prefix==15){
3119
            level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
3120
            if(suffix_length==0) level_code+=15; //FIXME doesnt make (much)sense
3121
        }else{
3122
            fprintf(stderr, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
3123
            return -1;
3124
        }
3125

    
3126
        if(i==trailing_ones && i<3) level_code+= 2; //FIXME split first iteration
3127

    
3128
        mask= -(level_code&1);
3129
        level[i]= (((2+level_code)>>1) ^ mask) - mask;
3130

    
3131
        if(suffix_length==0) suffix_length=1; //FIXME split first iteration
3132

    
3133
#if 1
3134
        if(ABS(level[i]) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
3135
#else        
3136
        if((2+level_code)>>1) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
3137
        ? == prefix > 2 or sth
3138
#endif
3139
        tprintf("level: %d suffix_length:%d\n", level[i], suffix_length);
3140
    }
3141

    
3142
    if(total_coeff == max_coeff)
3143
        zeros_left=0;
3144
    else{
3145
        if(n == CHROMA_DC_BLOCK_INDEX)
3146
            zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
3147
        else
3148
            zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
3149
    }
3150
    
3151
    for(i=0; i<total_coeff-1; i++){
3152
        if(zeros_left <=0)
3153
            break;
3154
        else if(zeros_left < 7){
3155
            run[i]= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
3156
        }else{
3157
            run[i]= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
3158
        }
3159
        zeros_left -= run[i];
3160
    }
3161

    
3162
    if(zeros_left<0){
3163
        fprintf(stderr, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
3164
        return -1;
3165
    }
3166
    
3167
    for(; i<total_coeff-1; i++){
3168
        run[i]= 0;
3169
    }
3170

    
3171
    run[i]= zeros_left;
3172

    
3173
    coeff_num=-1;
3174
    if(n > 24){
3175
        for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
3176
            int j;
3177

    
3178
            coeff_num += run[i] + 1; //FIXME add 1 earlier ?
3179
            j= scantable[ coeff_num ];
3180

    
3181
            block[j]= level[i];
3182
        }
3183
    }else{
3184
        for(i=total_coeff-1; i>=0; i--){ //FIXME merge into  rundecode?
3185
            int j;
3186

    
3187
            coeff_num += run[i] + 1; //FIXME add 1 earlier ?
3188
            j= scantable[ coeff_num ];
3189

    
3190
            block[j]= level[i] * qmul[j];
3191
//            printf("%d %d  ", block[j], qmul[j]);
3192
        }
3193
    }
3194
    return 0;
3195
}
3196

    
3197
/**
3198
 * decodes a macroblock
3199
 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
3200
 */
3201
static int decode_mb(H264Context *h){
3202
    MpegEncContext * const s = &h->s;
3203
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
3204
    int mb_type, partition_count, cbp;
3205

    
3206
    s->dsp.clear_blocks(h->mb); //FIXME avoid if allready clear (move after skip handlong?    
3207

    
3208
    tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
3209

    
3210
    if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
3211
        if(s->mb_skip_run==-1)
3212
            s->mb_skip_run= get_ue_golomb(&s->gb);
3213
        
3214
        if (s->mb_skip_run--) {
3215
            int mx, my;
3216
            /* skip mb */
3217
//FIXME b frame
3218
            mb_type= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0;
3219

    
3220
            memset(h->non_zero_count[mb_xy], 0, 16);
3221
            memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
3222

    
3223
            if(h->sps.mb_aff && s->mb_skip_run==0 && (s->mb_y&1)==0){
3224
                h->mb_field_decoding_flag= get_bits1(&s->gb);
3225
            }
3226

    
3227
            if(h->mb_field_decoding_flag)
3228
                mb_type|= MB_TYPE_INTERLACED;
3229
            
3230
            fill_caches(h, mb_type); //FIXME check what is needed and what not ...
3231
            pred_pskip_motion(h, &mx, &my);
3232
            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
3233
            fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
3234
            write_back_motion(h, mb_type);
3235

    
3236
            s->current_picture.mb_type[mb_xy]= mb_type; //FIXME SKIP type
3237
            h->slice_table[ mb_xy ]= h->slice_num;
3238

    
3239
            h->prev_mb_skiped= 1;
3240
            return 0;
3241
        }
3242
    }
3243
    if(h->sps.mb_aff /* && !field pic FIXME needed? */){
3244
        if((s->mb_y&1)==0)
3245
            h->mb_field_decoding_flag = get_bits1(&s->gb);
3246
    }else
3247
        h->mb_field_decoding_flag=0; //FIXME som ed note ?!
3248
    
3249
    h->prev_mb_skiped= 0;
3250
    
3251
    mb_type= get_ue_golomb(&s->gb);
3252
    if(h->slice_type == B_TYPE){
3253
        if(mb_type < 23){
3254
            partition_count= b_mb_type_info[mb_type].partition_count;
3255
            mb_type=         b_mb_type_info[mb_type].type;
3256
        }else{
3257
            mb_type -= 23;
3258
            goto decode_intra_mb;
3259
        }
3260
    }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
3261
        if(mb_type < 5){
3262
            partition_count= p_mb_type_info[mb_type].partition_count;
3263
            mb_type=         p_mb_type_info[mb_type].type;
3264
        }else{
3265
            mb_type -= 5;
3266
            goto decode_intra_mb;
3267
        }
3268
    }else{
3269
       assert(h->slice_type == I_TYPE);
3270
decode_intra_mb:
3271
        if(mb_type > 25){
3272
            fprintf(stderr, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
3273
            return -1;
3274
        }
3275
        partition_count=0;
3276
        cbp= i_mb_type_info[mb_type].cbp;
3277
        h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
3278
        mb_type= i_mb_type_info[mb_type].type;
3279
    }
3280

    
3281
    if(h->mb_field_decoding_flag)
3282
        mb_type |= MB_TYPE_INTERLACED;
3283

    
3284
    s->current_picture.mb_type[mb_xy]= mb_type;
3285
    h->slice_table[ mb_xy ]= h->slice_num;
3286
    
3287
    if(IS_INTRA_PCM(mb_type)){
3288
        const uint8_t *ptr;
3289
        int x, y;
3290
        
3291
        // we assume these blocks are very rare so we dont optimize it
3292
        align_get_bits(&s->gb);
3293
        
3294
        ptr= s->gb.buffer + get_bits_count(&s->gb);
3295
    
3296
        for(y=0; y<16; y++){
3297
            const int index= 4*(y&3) + 64*(y>>2);
3298
            for(x=0; x<16; x++){
3299
                h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++);
3300
            }
3301
        }
3302
        for(y=0; y<8; y++){
3303
            const int index= 256 + 4*(y&3) + 32*(y>>2);
3304
            for(x=0; x<8; x++){
3305
                h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++);
3306
            }
3307
        }
3308
        for(y=0; y<8; y++){
3309
            const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
3310
            for(x=0; x<8; x++){
3311
                h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++);
3312
            }
3313
        }
3314
    
3315
        skip_bits(&s->gb, 384); //FIXME check /fix the bitstream readers
3316
        
3317
        memset(h->non_zero_count[mb_xy], 16, 16);
3318
        
3319
        return 0;
3320
    }
3321
        
3322
    fill_caches(h, mb_type);
3323

    
3324
    //mb_pred
3325
    if(IS_INTRA(mb_type)){
3326
//            init_top_left_availability(h);
3327
            if(IS_INTRA4x4(mb_type)){
3328
                int i;
3329

    
3330
//                fill_intra4x4_pred_table(h);
3331
                for(i=0; i<16; i++){
3332
                    const int mode_coded= !get_bits1(&s->gb);
3333
                    const int predicted_mode=  pred_intra_mode(h, i);
3334
                    int mode;
3335

    
3336
                    if(mode_coded){
3337
                        const int rem_mode= get_bits(&s->gb, 3);
3338
                        if(rem_mode<predicted_mode)
3339
                            mode= rem_mode;
3340
                        else
3341
                            mode= rem_mode + 1;
3342
                    }else{
3343
                        mode= predicted_mode;
3344
                    }
3345
                    
3346
                    h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
3347
                }
3348
                write_back_intra_pred_mode(h);
3349
                if( check_intra4x4_pred_mode(h) < 0)
3350
                    return -1;
3351
            }else{
3352
                h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
3353
                if(h->intra16x16_pred_mode < 0)
3354
                    return -1;
3355
            }
3356
            h->chroma_pred_mode= get_ue_golomb(&s->gb);
3357

    
3358
            h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
3359
            if(h->chroma_pred_mode < 0)
3360
                return -1;
3361
    }else if(partition_count==4){
3362
        int i, j, sub_partition_count[4], list, ref[2][4];
3363
        
3364
        if(h->slice_type == B_TYPE){
3365
            for(i=0; i<4; i++){
3366
                h->sub_mb_type[i]= get_ue_golomb(&s->gb);
3367
                if(h->sub_mb_type[i] >=13){
3368
                    fprintf(stderr, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
3369
                    return -1;
3370
                }
3371
                sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
3372
                h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
3373
            }
3374
        }else{
3375
            assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
3376
            for(i=0; i<4; i++){
3377
                h->sub_mb_type[i]= get_ue_golomb(&s->gb);
3378
                if(h->sub_mb_type[i] >=4){
3379
                    fprintf(stderr, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
3380
                    return -1;
3381
                }
3382
                sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
3383
                h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
3384
            }
3385
        }
3386
        
3387
        for(list=0; list<2; list++){
3388
            const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
3389
            if(ref_count == 0) continue;
3390
            for(i=0; i<4; i++){
3391
                if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
3392
                    ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
3393
                }else{
3394
                 //FIXME
3395
                    ref[list][i] = -1;
3396
                }
3397
            }
3398
        }
3399
        
3400
        for(list=0; list<2; list++){
3401
            const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
3402
            if(ref_count == 0) continue;
3403

    
3404
            for(i=0; i<4; i++){
3405
                h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
3406
                h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
3407

    
3408
                if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
3409
                    const int sub_mb_type= h->sub_mb_type[i];
3410
                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
3411
                    for(j=0; j<sub_partition_count[i]; j++){
3412
                        int mx, my;
3413
                        const int index= 4*i + block_width*j;
3414
                        int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
3415
                        pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
3416
                        mx += get_se_golomb(&s->gb);
3417
                        my += get_se_golomb(&s->gb);
3418
                        tprintf("final mv:%d %d\n", mx, my);
3419

    
3420
                        if(IS_SUB_8X8(sub_mb_type)){
3421
                            mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= 
3422
                            mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
3423
                            mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= 
3424
                            mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
3425
                        }else if(IS_SUB_8X4(sub_mb_type)){
3426
                            mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
3427
                            mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
3428
                        }else if(IS_SUB_4X8(sub_mb_type)){
3429
                            mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
3430
                            mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
3431
                        }else{
3432
                            assert(IS_SUB_4X4(sub_mb_type));
3433
                            mv_cache[ 0 ][0]= mx;
3434
                            mv_cache[ 0 ][1]= my;
3435
                        }
3436
                    }
3437
                }else{
3438
                    uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
3439
                    p[0] = p[1]=
3440
                    p[8] = p[9]= 0;
3441
                }
3442
            }
3443
        }
3444
    }else if(!IS_DIRECT(mb_type)){
3445
        int list, mx, my, i;
3446
         //FIXME we should set ref_idx_l? to 0 if we use that later ...
3447
        if(IS_16X16(mb_type)){
3448
            for(list=0; list<2; list++){
3449
                if(h->ref_count[0]>0){
3450
                    if(IS_DIR(mb_type, 0, list)){
3451
                        const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
3452
                        fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
3453
                    }
3454
                }
3455
            }
3456
            for(list=0; list<2; list++){
3457
                if(IS_DIR(mb_type, 0, list)){
3458
                    pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
3459
                    mx += get_se_golomb(&s->gb);
3460
                    my += get_se_golomb(&s->gb);
3461
                    tprintf("final mv:%d %d\n", mx, my);
3462

    
3463
                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
3464
                }
3465
            }
3466
        }
3467
        else if(IS_16X8(mb_type)){
3468
            for(list=0; list<2; list++){
3469
                if(h->ref_count[list]>0){
3470
                    for(i=0; i<2; i++){
3471
                        if(IS_DIR(mb_type, i, list)){
3472
                            const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
3473
                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
3474
                        }
3475
                    }
3476
                }
3477
            }
3478
            for(list=0; list<2; list++){
3479
                for(i=0; i<2; i++){
3480
                    if(IS_DIR(mb_type, i, list)){
3481
                        pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
3482
                        mx += get_se_golomb(&s->gb);
3483
                        my += get_se_golomb(&s->gb);
3484
                        tprintf("final mv:%d %d\n", mx, my);
3485

    
3486
                        fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
3487
                    }
3488
                }
3489
            }
3490
        }else{
3491
            assert(IS_8X16(mb_type));
3492
            for(list=0; list<2; list++){
3493
                if(h->ref_count[list]>0){
3494
                    for(i=0; i<2; i++){
3495
                        if(IS_DIR(mb_type, i, list)){ //FIXME optimize
3496
                            const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
3497
                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
3498
                        }
3499
                    }
3500
                }
3501
            }
3502
            for(list=0; list<2; list++){
3503
                for(i=0; i<2; i++){
3504
                    if(IS_DIR(mb_type, i, list)){
3505
                        pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
3506
                        mx += get_se_golomb(&s->gb);
3507
                        my += get_se_golomb(&s->gb);
3508
                        tprintf("final mv:%d %d\n", mx, my);
3509

    
3510
                        fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
3511
                    }
3512
                }
3513
            }
3514
        }
3515
    }
3516
    
3517
    if(IS_INTER(mb_type))
3518
        write_back_motion(h, mb_type);
3519
    
3520
    if(!IS_INTRA16x16(mb_type)){
3521
        cbp= get_ue_golomb(&s->gb);
3522
        if(cbp > 47){
3523
            fprintf(stderr, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
3524
            return -1;
3525
        }
3526
        
3527
        if(IS_INTRA4x4(mb_type))
3528
            cbp= golomb_to_intra4x4_cbp[cbp];
3529
        else
3530
            cbp= golomb_to_inter_cbp[cbp];
3531
    }
3532

    
3533
    if(cbp || IS_INTRA16x16(mb_type)){
3534
        int i8x8, i4x4, chroma_idx;
3535
        int chroma_qp, dquant;
3536
        GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
3537
        const uint8_t *scan, *dc_scan;
3538
        
3539
//        fill_non_zero_count_cache(h);
3540

    
3541
        if(IS_INTERLACED(mb_type)){
3542
            scan= field_scan;
3543
            dc_scan= luma_dc_field_scan;
3544
        }else{
3545
            scan= zigzag_scan;
3546
            dc_scan= luma_dc_zigzag_scan;
3547
        }
3548

    
3549
        dquant= get_se_golomb(&s->gb);
3550

    
3551
        if( dquant > 25 || dquant < -26 ){
3552
            fprintf(stderr, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
3553
            return -1;
3554
        }
3555
        
3556
        s->qscale += dquant;
3557
        if(((unsigned)s->qscale) > 51){
3558
            if(s->qscale<0) s->qscale+= 52;
3559
            else            s->qscale-= 52;
3560
        }
3561
        
3562
        h->chroma_qp= chroma_qp= get_chroma_qp(h, s->qscale);
3563
        if(IS_INTRA16x16(mb_type)){
3564
            if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, s->qscale, 16) < 0){
3565
                return -1; //FIXME continue if partotioned and other retirn -1 too
3566
            }
3567

    
3568
            assert((cbp&15) == 0 || (cbp&15) == 15);
3569

    
3570
            if(cbp&15){
3571
                for(i8x8=0; i8x8<4; i8x8++){
3572
                    for(i4x4=0; i4x4<4; i4x4++){
3573
                        const int index= i4x4 + 4*i8x8;
3574
                        if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, s->qscale, 15) < 0 ){
3575
                            return -1;
3576
                        }
3577
                    }
3578
                }
3579
            }else{
3580
                memset(&h->non_zero_count_cache[8], 0, 8*4); //FIXME stupid & slow
3581
            }
3582
        }else{
3583
            for(i8x8=0; i8x8<4; i8x8++){
3584
                if(cbp & (1<<i8x8)){
3585
                    for(i4x4=0; i4x4<4; i4x4++){
3586
                        const int index= i4x4 + 4*i8x8;
3587
                        
3588
                        if( decode_residual(h, gb, h->mb + 16*index, index, scan, s->qscale, 16) <0 ){
3589
                            return -1;
3590
                        }
3591
                    }
3592
                }else{
3593
                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
3594
                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
3595
                }
3596
            }
3597
        }
3598
        
3599
        if(cbp&0x30){
3600
            for(chroma_idx=0; chroma_idx<2; chroma_idx++)
3601
                if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, chroma_qp, 4) < 0){
3602
                    return -1;
3603
                }
3604
        }
3605

    
3606
        if(cbp&0x20){
3607
            for(chroma_idx=0; chroma_idx<2; chroma_idx++){
3608
                for(i4x4=0; i4x4<4; i4x4++){
3609
                    const int index= 16 + 4*chroma_idx + i4x4;
3610
                    if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, chroma_qp, 15) < 0){
3611
                        return -1;
3612
                    }
3613
                }
3614
            }
3615
        }else{
3616
            uint8_t * const nnz= &h->non_zero_count_cache[0];
3617
            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
3618
            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
3619
        }
3620
    }else{
3621
        memset(&h->non_zero_count_cache[8], 0, 8*5);
3622
    }
3623
    write_back_non_zero_count(h);
3624

    
3625
    return 0;
3626
}
3627

    
3628
static int decode_slice(H264Context *h){
3629
    MpegEncContext * const s = &h->s;
3630
    const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
3631

    
3632
    s->mb_skip_run= -1;
3633
    
3634
#if 1
3635
    for(;;){
3636
        int ret= decode_mb(h);
3637
            
3638
        hl_decode_mb(h);
3639
        
3640
        if(ret>=0 && h->sps.mb_aff){ //FIXME optimal? or let mb_decode decode 16x32 ?
3641
            s->mb_y++;
3642
            ret= decode_mb(h);
3643
            
3644
            hl_decode_mb(h);
3645
            s->mb_y--;
3646
        }
3647

    
3648
        if(ret<0){
3649
            fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
3650
            ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
3651

    
3652
            return -1;
3653
        }
3654
        
3655
        if(++s->mb_x >= s->mb_width){
3656
            s->mb_x=0;
3657
            ff_draw_horiz_band(s, 16*s->mb_y, 16);
3658
            if(++s->mb_y >= s->mb_height){
3659
                tprintf("slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
3660

    
3661
                if(get_bits_count(&s->gb) == s->gb.size_in_bits){
3662
                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
3663

    
3664
                    return 0;
3665
                }else{
3666
                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
3667

    
3668
                    return -1;
3669
                }
3670
            }
3671
        }
3672
        
3673
        if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
3674
            if(get_bits_count(&s->gb) == s->gb.size_in_bits){
3675
                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
3676

    
3677
                return 0;
3678
            }else{
3679
                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
3680

    
3681
                return -1;
3682
            }
3683
        }
3684
    }
3685
#endif
3686
#if 0
3687
    for(;s->mb_y < s->mb_height; s->mb_y++){
3688
        for(;s->mb_x < s->mb_width; s->mb_x++){
3689
            int ret= decode_mb(h);
3690
            
3691
            hl_decode_mb(h);
3692

3693
            if(ret<0){
3694
                fprintf(stderr, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
3695
                ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
3696

3697
                return -1;
3698
            }
3699
        
3700
            if(++s->mb_x >= s->mb_width){
3701
                s->mb_x=0;
3702
                if(++s->mb_y >= s->mb_height){
3703
                    if(get_bits_count(s->gb) == s->gb.size_in_bits){
3704
                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
3705

3706
                        return 0;
3707
                    }else{
3708
                        ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
3709

3710
                        return -1;
3711
                    }
3712
                }
3713
            }
3714
        
3715
            if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
3716
                if(get_bits_count(s->gb) == s->gb.size_in_bits){
3717
                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
3718

3719
                    return 0;
3720
                }else{
3721
                    ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
3722

3723
                    return -1;
3724
                }
3725
            }
3726
        }
3727
        s->mb_x=0;
3728
        ff_draw_horiz_band(s, 16*s->mb_y, 16);
3729
    }
3730
#endif
3731
    return -1; //not reached
3732
}
3733

    
3734
static inline int decode_vui_parameters(H264Context *h, SPS *sps){
3735
    MpegEncContext * const s = &h->s;
3736
    int aspect_ratio_info_present_flag, aspect_ratio_idc;
3737

    
3738
    aspect_ratio_info_present_flag= get_bits1(&s->gb);
3739
    
3740
    if( aspect_ratio_info_present_flag ) {
3741
        aspect_ratio_idc= get_bits(&s->gb, 8);
3742
        if( aspect_ratio_idc == EXTENDED_SAR ) {
3743
            sps->sar_width= get_bits(&s->gb, 16);
3744
            sps->sar_height= get_bits(&s->gb, 16);
3745
        }else if(aspect_ratio_idc < 16){
3746
            sps->sar_width=  pixel_aspect[aspect_ratio_idc][0];
3747
            sps->sar_height= pixel_aspect[aspect_ratio_idc][1];
3748
        }else{
3749
            fprintf(stderr, "illegal aspect ratio\n");
3750
            return -1;
3751
        }
3752
    }else{
3753
        sps->sar_width= 
3754
        sps->sar_height= 0;
3755
    }
3756
//            s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
3757
#if 0
3758
| overscan_info_present_flag                        |0  |u(1)    |
3759
| if( overscan_info_present_flag )                  |   |        |
3760
|  overscan_appropriate_flag                        |0  |u(1)    |
3761
| video_signal_type_present_flag                    |0  |u(1)    |
3762
| if( video_signal_type_present_flag ) {            |   |        |
3763
|  video_format                                     |0  |u(3)    |
3764
|  video_full_range_flag                            |0  |u(1)    |
3765
|  colour_description_present_flag                  |0  |u(1)    |
3766
|  if( colour_description_present_flag ) {          |   |        |
3767
|   colour_primaries                                |0  |u(8)    |
3768
|   transfer_characteristics                        |0  |u(8)    |
3769
|   matrix_coefficients                             |0  |u(8)    |
3770
|  }                                                |   |        |
3771
| }                                                 |   |        |
3772
| chroma_location_info_present_flag                 |0  |u(1)    |
3773
| if ( chroma_location_info_present_flag ) {        |   |        |
3774
|  chroma_sample_location_type_top_field            |0  |ue(v)   |
3775
|  chroma_sample_location_type_bottom_field         |0  |ue(v)   |
3776
| }                                                 |   |        |
3777
| timing_info_present_flag                          |0  |u(1)    |
3778
| if( timing_info_present_flag ) {                  |   |        |
3779
|  num_units_in_tick                                |0  |u(32)   |
3780
|  time_scale                                       |0  |u(32)   |
3781
|  fixed_frame_rate_flag                            |0  |u(1)    |
3782
| }                                                 |   |        |
3783
| nal_hrd_parameters_present_flag                   |0  |u(1)    |
3784
| if( nal_hrd_parameters_present_flag  = =  1)      |   |        |
3785
|  hrd_parameters( )                                |   |        |
3786
| vcl_hrd_parameters_present_flag                   |0  |u(1)    |
3787
| if( vcl_hrd_parameters_present_flag  = =  1)      |   |        |
3788
|  hrd_parameters( )                                |   |        |
3789
| if( ( nal_hrd_parameters_present_flag  = =  1  | ||   |        |
3790
|                                                   |   |        |
3791
|( vcl_hrd_parameters_present_flag  = =  1 ) )      |   |        |
3792
|  low_delay_hrd_flag                               |0  |u(1)    |
3793
| bitstream_restriction_flag                        |0  |u(1)    |
3794
| if( bitstream_restriction_flag ) {                |0  |u(1)    |
3795
|  motion_vectors_over_pic_boundaries_flag          |0  |u(1)    |
3796
|  max_bytes_per_pic_denom                          |0  |ue(v)   |
3797
|  max_bits_per_mb_denom                            |0  |ue(v)   |
3798
|  log2_max_mv_length_horizontal                    |0  |ue(v)   |
3799
|  log2_max_mv_length_vertical                      |0  |ue(v)   |
3800
|  num_reorder_frames                               |0  |ue(v)   |
3801
|  max_dec_frame_buffering                          |0  |ue(v)   |
3802
| }                                                 |   |        |
3803
|}                                                  |   |        |
3804
#endif
3805
    return 0;
3806
}
3807

    
3808
static inline int decode_seq_parameter_set(H264Context *h){
3809
    MpegEncContext * const s = &h->s;
3810
    int profile_idc, level_idc, multiple_slice_groups, arbitrary_slice_order, redundant_slices;
3811
    int sps_id, i;
3812
    SPS *sps;
3813
    
3814
    profile_idc= get_bits(&s->gb, 8);
3815
    level_idc= get_bits(&s->gb, 8);
3816
    multiple_slice_groups= get_bits1(&s->gb);
3817
    arbitrary_slice_order= get_bits1(&s->gb);
3818
    redundant_slices= get_bits1(&s->gb);
3819
    
3820
    sps_id= get_ue_golomb(&s->gb);
3821
    
3822
    sps= &h->sps_buffer[ sps_id ];
3823
    
3824
    sps->profile_idc= profile_idc;
3825
    sps->level_idc= level_idc;
3826
    sps->multiple_slice_groups= multiple_slice_groups;
3827
    sps->arbitrary_slice_order= arbitrary_slice_order;
3828
    sps->redundant_slices= redundant_slices;
3829
    
3830
    sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
3831

    
3832
    sps->poc_type= get_ue_golomb(&s->gb);
3833
    
3834
    if(sps->poc_type == 0){ //FIXME #define
3835
        sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
3836
    } else if(sps->poc_type == 1){//FIXME #define
3837
        sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
3838
        sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
3839
        sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
3840
        sps->poc_cycle_length= get_ue_golomb(&s->gb);
3841
        
3842
        for(i=0; i<sps->poc_cycle_length; i++)
3843
            sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
3844
    }
3845
    if(sps->poc_type > 2){
3846
        fprintf(stderr, "illegal POC type %d\n", sps->poc_type);
3847
        return -1;
3848
    }
3849

    
3850
    sps->ref_frame_count= get_ue_golomb(&s->gb);
3851
    sps->required_frame_num_update_behaviour_flag= get_bits1(&s->gb);
3852
    sps->mb_width= get_ue_golomb(&s->gb) + 1;
3853
    sps->mb_height= get_ue_golomb(&s->gb) + 1;
3854
    sps->frame_mbs_only_flag= get_bits1(&s->gb);
3855
    if(!sps->frame_mbs_only_flag)
3856
        sps->mb_aff= get_bits1(&s->gb);
3857
    else
3858
        sps->mb_aff= 0;
3859

    
3860
    sps->direct_8x8_inference_flag= get_bits1(&s->gb);
3861

    
3862
    sps->vui_parameters_present_flag= get_bits1(&s->gb);
3863
    if( sps->vui_parameters_present_flag )
3864
        decode_vui_parameters(h, sps);
3865
    
3866
    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3867
        printf("sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s %s\n", 
3868
               sps_id, sps->profile_idc, sps->level_idc,
3869
               sps->poc_type,
3870
               sps->ref_frame_count,
3871
               sps->mb_width, sps->mb_height,
3872
               sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
3873
               sps->direct_8x8_inference_flag ? "8B8" : "",
3874
               sps->vui_parameters_present_flag ? "VUI" : ""
3875
               );
3876
    }
3877
    return 0;
3878
}
3879

    
3880
static inline int decode_picture_parameter_set(H264Context *h){
3881
    MpegEncContext * const s = &h->s;
3882
    int pps_id= get_ue_golomb(&s->gb);
3883
    PPS *pps= &h->pps_buffer[pps_id];
3884
    
3885
    pps->sps_id= get_ue_golomb(&s->gb);
3886
    pps->cabac= get_bits1(&s->gb);
3887
    pps->pic_order_present= get_bits1(&s->gb);
3888
    pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
3889
    if(pps->slice_group_count > 1 ){
3890
        pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
3891
fprintf(stderr, "FMO not supported\n");
3892
        switch(pps->mb_slice_group_map_type){
3893
        case 0:
3894
#if 0
3895
|   for( i = 0; i <= num_slice_groups_minus1; i++ ) |   |        |
3896
|    run_length[ i ]                                |1  |ue(v)   |
3897
#endif
3898
            break;
3899
        case 2:
3900
#if 0
3901
|   for( i = 0; i < num_slice_groups_minus1; i++ )  |   |        |
3902
|{                                                  |   |        |
3903
|    top_left_mb[ i ]                               |1  |ue(v)   |
3904
|    bottom_right_mb[ i ]                           |1  |ue(v)   |
3905
|   }                                               |   |        |
3906
#endif
3907
            break;
3908
        case 3:
3909
        case 4:
3910
        case 5:
3911
#if 0
3912
|   slice_group_change_direction_flag               |1  |u(1)    |
3913
|   slice_group_change_rate_minus1                  |1  |ue(v)   |
3914
#endif
3915
            break;
3916
        case 6:
3917
#if 0
3918
|   slice_group_id_cnt_minus1                       |1  |ue(v)   |
3919
|   for( i = 0; i <= slice_group_id_cnt_minus1; i++ |   |        |
3920
|)                                                  |   |        |
3921
|    slice_group_id[ i ]                            |1  |u(v)    |
3922
#endif
3923
            break;
3924
        }
3925
    }
3926
    pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3927
    pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3928
    if(pps->ref_count[0] > 32 || pps->ref_count[1] > 32){
3929
        fprintf(stderr, "reference overflow (pps)\n");
3930
        return -1;
3931
    }
3932
    
3933
    pps->weighted_pred= get_bits1(&s->gb);
3934
    pps->weighted_bipred_idc= get_bits(&s->gb, 2);
3935
    pps->init_qp= get_se_golomb(&s->gb) + 26;
3936
    pps->init_qs= get_se_golomb(&s->gb) + 26;
3937
    pps->chroma_qp_index_offset= get_se_golomb(&s->gb);
3938
    pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
3939
    pps->constrained_intra_pred= get_bits1(&s->gb);
3940
    pps->redundant_pic_cnt_present = get_bits1(&s->gb);
3941
    pps->crop= get_bits1(&s->gb);
3942
    if(pps->crop){
3943
        pps->crop_left  = get_ue_golomb(&s->gb);
3944
        pps->crop_right = get_ue_golomb(&s->gb);
3945
        pps->crop_top   = get_ue_golomb(&s->gb);
3946
        pps->crop_bottom= get_ue_golomb(&s->gb);
3947
    }else{
3948
        pps->crop_left  = 
3949
        pps->crop_right = 
3950
        pps->crop_top   = 
3951
        pps->crop_bottom= 0;
3952
    }
3953
    
3954
    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3955
        printf("pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s crop:%d/%d/%d/%d\n", 
3956
               pps_id, pps->sps_id,
3957
               pps->cabac ? "CABAC" : "CAVLC",
3958
               pps->slice_group_count,
3959
               pps->ref_count[0], pps->ref_count[1],
3960
               pps->weighted_pred ? "weighted" : "",
3961
               pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset,
3962
               pps->deblocking_filter_parameters_present ? "LPAR" : "",
3963
               pps->constrained_intra_pred ? "CONSTR" : "",
3964
               pps->redundant_pic_cnt_present ? "REDU" : "",
3965
               pps->crop_left, pps->crop_right, 
3966
               pps->crop_top, pps->crop_bottom
3967
               );
3968
    }
3969
    
3970
    return 0;
3971
}
3972

    
3973
/**
3974
 * finds the end of the current frame in the bitstream.
3975
 * @return the position of the first byte of the next frame, or -1
3976
 */
3977
static int find_frame_end(MpegEncContext *s, uint8_t *buf, int buf_size){
3978
    ParseContext *pc= &s->parse_context;
3979
    int i;
3980
    uint32_t state;
3981
//printf("first %02X%02X%02X%02X\n", buf[0], buf[1],buf[2],buf[3]);
3982
//    mb_addr= pc->mb_addr - 1;
3983
    state= pc->state;
3984
    //FIXME this will fail with slices
3985
    for(i=0; i<buf_size; i++){
3986
        state= (state<<8) | buf[i];
3987
        if((state&0xFFFFFF1F) == 0x101 || (state&0xFFFFFF1F) == 0x102 || (state&0xFFFFFF1F) == 0x105){
3988
            if(pc->frame_start_found){
3989
                pc->state=-1; 
3990
                pc->frame_start_found= 0;
3991
                return i-3;
3992
            }
3993
            pc->frame_start_found= 1;
3994
        }
3995
    }
3996
    
3997
    pc->state= state;
3998
    return END_NOT_FOUND;
3999
}
4000

    
4001
static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
4002
    MpegEncContext * const s = &h->s;
4003
    AVCodecContext * const avctx= s->avctx;
4004
    int buf_index=0;
4005
#if 0
4006
    int i;
4007
    for(i=0; i<32; i++){
4008
        printf("%X ", buf[i]);
4009
    }
4010
#endif
4011
    for(;;){
4012
        int consumed;
4013
        int dst_length;
4014
        int bit_length;
4015
        uint8_t *ptr;
4016
        
4017
        // start code prefix search
4018
        for(; buf_index + 3 < buf_size; buf_index++){
4019
            // this should allways succeed in the first iteration
4020
            if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
4021
                break;
4022
        }
4023
        
4024
        if(buf_index+3 >= buf_size) break;
4025
        
4026
        buf_index+=3;
4027
        
4028
        ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, buf_size - buf_index);
4029
        if(ptr[dst_length - 1] == 0) dst_length--;
4030
        bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
4031

    
4032
        if(s->avctx->debug&FF_DEBUG_STARTCODE){
4033
            printf("NAL %d at %d length %d\n", h->nal_unit_type, buf_index, dst_length);
4034
        }
4035
        
4036
        buf_index += consumed;
4037

    
4038
        if(h->nal_ref_idc < s->hurry_up)
4039
            continue;
4040
        
4041
        switch(h->nal_unit_type){
4042
        case NAL_IDR_SLICE:
4043
            idr(h); //FIXME ensure we dont loose some frames if there is reordering
4044
        case NAL_SLICE:
4045
            init_get_bits(&s->gb, ptr, bit_length);
4046
            h->intra_gb_ptr=
4047
            h->inter_gb_ptr= &s->gb;
4048
            s->data_partitioning = 0;
4049
            
4050
            if(decode_slice_header(h) < 0) return -1;
4051
            if(h->redundant_pic_count==0)
4052
                decode_slice(h);
4053
            break;
4054
        case NAL_DPA:
4055
            init_get_bits(&s->gb, ptr, bit_length);
4056
            h->intra_gb_ptr=
4057
            h->inter_gb_ptr= NULL;
4058
            s->data_partitioning = 1;
4059
            
4060
            if(decode_slice_header(h) < 0) return -1;
4061
            break;
4062
        case NAL_DPB:
4063
            init_get_bits(&h->intra_gb, ptr, bit_length);
4064
            h->intra_gb_ptr= &h->intra_gb;
4065
            break;
4066
        case NAL_DPC:
4067
            init_get_bits(&h->inter_gb, ptr, bit_length);
4068
            h->inter_gb_ptr= &h->inter_gb;
4069

    
4070
            if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning)
4071
                decode_slice(h);
4072
            break;
4073
        case NAL_SEI:
4074
            break;
4075
        case NAL_SPS:
4076
            init_get_bits(&s->gb, ptr, bit_length);
4077
            decode_seq_parameter_set(h);
4078
            
4079
            if(s->flags& CODEC_FLAG_LOW_DELAY)
4080
                s->low_delay=1;
4081
      
4082
            avctx->has_b_frames= !s->low_delay;
4083
            break;
4084
        case NAL_PPS:
4085
            init_get_bits(&s->gb, ptr, bit_length);
4086
            
4087
            decode_picture_parameter_set(h);
4088

    
4089
            break;
4090
        case NAL_PICTURE_DELIMITER:
4091
            break;
4092
        case NAL_FILTER_DATA:
4093
            break;
4094
        }        
4095

    
4096
        //FIXME move after where irt is set
4097
        s->current_picture.pict_type= s->pict_type;
4098
        s->current_picture.key_frame= s->pict_type == I_TYPE;
4099
    }
4100
    
4101
    if(!s->current_picture_ptr) return buf_index; //no frame
4102
    
4103
    h->prev_frame_num_offset= h->frame_num_offset;
4104
    h->prev_frame_num= h->frame_num;
4105
    if(s->current_picture_ptr->reference){
4106
        h->prev_poc_msb= h->poc_msb;
4107
        h->prev_poc_lsb= h->poc_lsb;
4108
    }
4109
    if(s->current_picture_ptr->reference)
4110
        execute_ref_pic_marking(h, h->mmco, h->mmco_index);
4111
    else
4112
        assert(h->mmco_index==0);
4113

    
4114
    ff_er_frame_end(s);
4115
    MPV_frame_end(s);
4116

    
4117
    return buf_index;
4118
}
4119

    
4120
/**
4121
 * retunrs the number of bytes consumed for building the current frame
4122
 */
4123
static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
4124
    if(s->flags&CODEC_FLAG_TRUNCATED){
4125
        pos -= s->parse_context.last_index;
4126
        if(pos<0) pos=0; // FIXME remove (uneeded?)
4127
        
4128
        return pos;
4129
    }else{
4130
        if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
4131
        if(pos+10>buf_size) pos=buf_size; // oops ;)
4132

    
4133
        return pos;
4134
    }
4135
}
4136

    
4137
static int decode_frame(AVCodecContext *avctx, 
4138
                             void *data, int *data_size,
4139
                             uint8_t *buf, int buf_size)
4140
{
4141
    H264Context *h = avctx->priv_data;
4142
    MpegEncContext *s = &h->s;
4143
    AVFrame *pict = data; 
4144
    int buf_index;
4145
    
4146
    s->flags= avctx->flags;
4147

    
4148
    *data_size = 0;
4149
   
4150
   /* no supplementary picture */
4151
    if (buf_size == 0) {
4152
        return 0;
4153
    }
4154
    
4155
    if(s->flags&CODEC_FLAG_TRUNCATED){
4156
        int next= find_frame_end(s, buf, buf_size);
4157
        
4158
        if( ff_combine_frame(s, next, &buf, &buf_size) < 0 )
4159
            return buf_size;
4160
//printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
4161
    }
4162

    
4163
    if(s->avctx->extradata_size && s->picture_number==0){
4164
        if(0 < decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) ) 
4165
            return -1;
4166
    }
4167

    
4168
    buf_index=decode_nal_units(h, buf, buf_size);
4169
    if(buf_index < 0) 
4170
        return -1;
4171

    
4172
    //FIXME do something with unavailable reference frames    
4173
 
4174
//    if(ret==FRAME_SKIPED) return get_consumed_bytes(s, buf_index, buf_size);
4175
#if 0
4176
    if(s->pict_type==B_TYPE || s->low_delay){
4177
        *pict= *(AVFrame*)&s->current_picture;
4178
    } else {
4179
        *pict= *(AVFrame*)&s->last_picture;
4180
    }
4181
#endif
4182
    if(!s->current_picture_ptr){
4183
        fprintf(stderr, "error, NO frame\n");
4184
        return -1;
4185
    }
4186

    
4187
    *pict= *(AVFrame*)&s->current_picture; //FIXME 
4188
    ff_print_debug_info(s, s->current_picture_ptr);
4189
    assert(pict->data[0]);
4190
//printf("out %d\n", (int)pict->data[0]);
4191
#if 0 //?
4192

4193
    /* Return the Picture timestamp as the frame number */
4194
    /* we substract 1 because it is added on utils.c    */
4195
    avctx->frame_number = s->picture_number - 1;
4196
#endif
4197
#if 0
4198
    /* dont output the last pic after seeking */
4199
    if(s->last_picture_ptr || s->low_delay)
4200
    //Note this isnt a issue as a IDR pic should flush teh buffers
4201
#endif
4202
        *data_size = sizeof(AVFrame);
4203
    return get_consumed_bytes(s, buf_index, buf_size);
4204
}
4205
#if 0
4206
static inline void fill_mb_avail(H264Context *h){
4207
    MpegEncContext * const s = &h->s;
4208
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4209

4210
    if(s->mb_y){
4211
        h->mb_avail[0]= s->mb_x                 && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
4212
        h->mb_avail[1]=                            h->slice_table[mb_xy - s->mb_stride    ] == h->slice_num;
4213
        h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
4214
    }else{
4215
        h->mb_avail[0]=
4216
        h->mb_avail[1]=
4217
        h->mb_avail[2]= 0;
4218
    }
4219
    h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
4220
    h->mb_avail[4]= 1; //FIXME move out
4221
    h->mb_avail[5]= 0; //FIXME move out
4222
}
4223
#endif
4224

    
4225
#if 0 //selftest
4226
#define COUNT 8000
4227
#define SIZE (COUNT*40)
4228
int main(){
4229
    int i;
4230
    uint8_t temp[SIZE];
4231
    PutBitContext pb;
4232
    GetBitContext gb;
4233
//    int int_temp[10000];
4234
    DSPContext dsp;
4235
    AVCodecContext avctx;
4236
    
4237
    dsputil_init(&dsp, &avctx);
4238

4239
    init_put_bits(&pb, temp, SIZE, NULL, NULL);
4240
    printf("testing unsigned exp golomb\n");
4241
    for(i=0; i<COUNT; i++){
4242
        START_TIMER
4243
        set_ue_golomb(&pb, i);
4244
        STOP_TIMER("set_ue_golomb");
4245
    }
4246
    flush_put_bits(&pb);
4247
    
4248
    init_get_bits(&gb, temp, 8*SIZE);
4249
    for(i=0; i<COUNT; i++){
4250
        int j, s;
4251
        
4252
        s= show_bits(&gb, 24);
4253
        
4254
        START_TIMER
4255
        j= get_ue_golomb(&gb);
4256
        if(j != i){
4257
            printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
4258
//            return -1;
4259
        }
4260
        STOP_TIMER("get_ue_golomb");
4261
    }
4262
    
4263
    
4264
    init_put_bits(&pb, temp, SIZE, NULL, NULL);
4265
    printf("testing signed exp golomb\n");
4266
    for(i=0; i<COUNT; i++){
4267
        START_TIMER
4268
        set_se_golomb(&pb, i - COUNT/2);
4269
        STOP_TIMER("set_se_golomb");
4270
    }
4271
    flush_put_bits(&pb);
4272
    
4273
    init_get_bits(&gb, temp, 8*SIZE);
4274
    for(i=0; i<COUNT; i++){
4275
        int j, s;
4276
        
4277
        s= show_bits(&gb, 24);
4278
        
4279
        START_TIMER
4280
        j= get_se_golomb(&gb);
4281
        if(j != i - COUNT/2){
4282
            printf("missmatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
4283
//            return -1;
4284
        }
4285
        STOP_TIMER("get_se_golomb");
4286
    }
4287

4288
    printf("testing 4x4 (I)DCT\n");
4289
    
4290
    DCTELEM block[16];
4291
    uint8_t src[16], ref[16];
4292
    uint64_t error= 0, max_error=0;
4293

4294
    for(i=0; i<COUNT; i++){
4295
        int j;
4296
//        printf("%d %d %d\n", r1, r2, (r2-r1)*16);
4297
        for(j=0; j<16; j++){
4298
            ref[j]= random()%255;
4299
            src[j]= random()%255;
4300
        }
4301

4302
        h264_diff_dct_c(block, src, ref, 4);
4303
        
4304
        //normalize
4305
        for(j=0; j<16; j++){
4306
//            printf("%d ", block[j]);
4307
            block[j]= block[j]*4;
4308
            if(j&1) block[j]= (block[j]*4 + 2)/5;
4309
            if(j&4) block[j]= (block[j]*4 + 2)/5;
4310
        }
4311
//        printf("\n");
4312
        
4313
        h264_add_idct_c(ref, block, 4);
4314
/*        for(j=0; j<16; j++){
4315
            printf("%d ", ref[j]);
4316
        }
4317
        printf("\n");*/
4318
            
4319
        for(j=0; j<16; j++){
4320
            int diff= ABS(src[j] - ref[j]);
4321
            
4322
            error+= diff*diff;
4323
            max_error= FFMAX(max_error, diff);
4324
        }
4325
    }
4326
    printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
4327
#if 0
4328
    printf("testing quantizer\n");
4329
    for(qp=0; qp<52; qp++){
4330
        for(i=0; i<16; i++)
4331
            src1_block[i]= src2_block[i]= random()%255;
4332
        
4333
    }
4334
#endif
4335
    printf("Testing NAL layer\n");
4336
    
4337
    uint8_t bitstream[COUNT];
4338
    uint8_t nal[COUNT*2];
4339
    H264Context h;
4340
    memset(&h, 0, sizeof(H264Context));
4341
    
4342
    for(i=0; i<COUNT; i++){
4343
        int zeros= i;
4344
        int nal_length;
4345
        int consumed;
4346
        int out_length;
4347
        uint8_t *out;
4348
        int j;
4349
        
4350
        for(j=0; j<COUNT; j++){
4351
            bitstream[j]= (random() % 255) + 1;
4352
        }
4353
        
4354
        for(j=0; j<zeros; j++){
4355
            int pos= random() % COUNT;
4356
            while(bitstream[pos] == 0){
4357
                pos++;
4358
                pos %= COUNT;
4359
            }
4360
            bitstream[pos]=0;
4361
        }
4362
        
4363
        START_TIMER
4364
        
4365
        nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
4366
        if(nal_length<0){
4367
            printf("encoding failed\n");
4368
            return -1;
4369
        }
4370
        
4371
        out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
4372

    
4373
        STOP_TIMER("NAL")
4374
        
4375
        if(out_length != COUNT){
4376
            printf("incorrect length %d %d\n", out_length, COUNT);
4377
            return -1;
4378
        }
4379
        
4380
        if(consumed != nal_length){
4381
            printf("incorrect consumed length %d %d\n", nal_length, consumed);
4382
            return -1;
4383
        }
4384
        
4385
        if(memcmp(bitstream, out, COUNT)){
4386
            printf("missmatch\n");
4387
            return -1;
4388
        }
4389
    }
4390
    
4391
    printf("Testing RBSP\n");
4392
    
4393
    
4394
    return 0;
4395
}
4396
#endif
4397

    
4398

    
4399
static int decode_end(AVCodecContext *avctx)
4400
{
4401
    H264Context *h = avctx->priv_data;
4402
    MpegEncContext *s = &h->s;
4403
    
4404
    free_tables(h); //FIXME cleanup init stuff perhaps
4405
    MPV_common_end(s);
4406

    
4407
//    memset(h, 0, sizeof(H264Context));
4408
        
4409
    return 0;
4410
}
4411

    
4412

    
4413
AVCodec h264_decoder = {
4414
    "h264",
4415
    CODEC_TYPE_VIDEO,
4416
    CODEC_ID_H264,
4417
    sizeof(H264Context),
4418
    decode_init,
4419
    NULL,
4420
    decode_end,
4421
    decode_frame,
4422
    /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED,
4423
};
4424

    
4425
#include "svq3.c"