Statistics
| Branch: | Revision:

ffmpeg / libavcodec / h264.c @ bc0219fd

History | View | Annotate | Download (150 KB)

1 0da71265 Michael Niedermayer
/*
2
 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 *
19
 */
20
 
21
/**
22
 * @file h264.c
23
 * H.264 / AVC / MPEG4 part10 codec.
24
 * @author Michael Niedermayer <michaelni@gmx.at>
25
 */
26
27
#include "common.h"
28
#include "dsputil.h"
29
#include "avcodec.h"
30
#include "mpegvideo.h"
31
#include "h264data.h"
32
#include "golomb.h"
33
34
#undef NDEBUG
35
#include <assert.h>
36
37
#define interlaced_dct interlaced_dct_is_a_bad_name
38
#define mb_intra mb_intra_isnt_initalized_see_mb_type
39
40
#define LUMA_DC_BLOCK_INDEX   25
41
#define CHROMA_DC_BLOCK_INDEX 26
42
43
#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
44
#define COEFF_TOKEN_VLC_BITS           8
45
#define TOTAL_ZEROS_VLC_BITS           9
46
#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
47
#define RUN_VLC_BITS                   3
48
#define RUN7_VLC_BITS                  6
49
50
#define MAX_SPS_COUNT 32
51
#define MAX_PPS_COUNT 256
52
53
#define MAX_MMCO_COUNT 66
54
55
/**
56
 * Sequence parameter set
57
 */
58
typedef struct SPS{
59
    
60
    int profile_idc;
61
    int level_idc;
62
    int log2_max_frame_num;            ///< log2_max_frame_num_minus4 + 4
63
    int poc_type;                      ///< pic_order_cnt_type
64
    int log2_max_poc_lsb;              ///< log2_max_pic_order_cnt_lsb_minus4
65
    int delta_pic_order_always_zero_flag;
66
    int offset_for_non_ref_pic;
67
    int offset_for_top_to_bottom_field;
68
    int poc_cycle_length;              ///< num_ref_frames_in_pic_order_cnt_cycle
69
    int ref_frame_count;               ///< num_ref_frames
70 a15e68de Michael Niedermayer
    int gaps_in_frame_num_allowed_flag;
71 0da71265 Michael Niedermayer
    int mb_width;                      ///< frame_width_in_mbs_minus1 + 1
72
    int mb_height;                     ///< frame_height_in_mbs_minus1 + 1
73
    int frame_mbs_only_flag;
74
    int mb_aff;                        ///<mb_adaptive_frame_field_flag
75
    int direct_8x8_inference_flag;
76 a15e68de Michael Niedermayer
    int crop;                   ///< frame_cropping_flag
77
    int crop_left;              ///< frame_cropping_rect_left_offset
78
    int crop_right;             ///< frame_cropping_rect_right_offset
79
    int crop_top;               ///< frame_cropping_rect_top_offset
80
    int crop_bottom;            ///< frame_cropping_rect_bottom_offset
81 0da71265 Michael Niedermayer
    int vui_parameters_present_flag;
82 5ff85f1d Michael Niedermayer
    AVRational sar;
83 0da71265 Michael Niedermayer
    short offset_for_ref_frame[256]; //FIXME dyn aloc?
84
}SPS;
85
86
/**
87
 * Picture parameter set
88
 */
89
typedef struct PPS{
90
    int sps_id;
91
    int cabac;                  ///< entropy_coding_mode_flag
92
    int pic_order_present;      ///< pic_order_present_flag
93
    int slice_group_count;      ///< num_slice_groups_minus1 + 1
94
    int mb_slice_group_map_type;
95
    int ref_count[2];           ///< num_ref_idx_l0/1_active_minus1 + 1
96
    int weighted_pred;          ///< weighted_pred_flag
97
    int weighted_bipred_idc;
98
    int init_qp;                ///< pic_init_qp_minus26 + 26
99
    int init_qs;                ///< pic_init_qs_minus26 + 26
100
    int chroma_qp_index_offset;
101
    int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
102
    int constrained_intra_pred; ///< constrained_intra_pred_flag
103
    int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
104
}PPS;
105
106
/**
107
 * Memory management control operation opcode.
108
 */
109
typedef enum MMCOOpcode{
110
    MMCO_END=0,
111
    MMCO_SHORT2UNUSED,
112
    MMCO_LONG2UNUSED,
113
    MMCO_SHORT2LONG,
114
    MMCO_SET_MAX_LONG,
115
    MMCO_RESET, 
116
    MMCO_LONG,
117
} MMCOOpcode;
118
119
/**
120
 * Memory management control operation.
121
 */
122
typedef struct MMCO{
123
    MMCOOpcode opcode;
124
    int short_frame_num;
125
    int long_index;
126
} MMCO;
127
128
/**
129
 * H264Context
130
 */
131
typedef struct H264Context{
132
    MpegEncContext s;
133
    int nal_ref_idc;        
134
    int nal_unit_type;
135
#define NAL_SLICE                1
136
#define NAL_DPA                        2
137
#define NAL_DPB                        3
138
#define NAL_DPC                        4
139
#define NAL_IDR_SLICE                5
140
#define NAL_SEI                        6
141
#define NAL_SPS                        7
142
#define NAL_PPS                        8
143
#define NAL_PICTURE_DELIMITER        9
144
#define NAL_FILTER_DATA                10
145
    uint8_t *rbsp_buffer;
146
    int rbsp_buffer_size;
147
148
    int chroma_qp; //QPc
149
150
    int prev_mb_skiped; //FIXME remove (IMHO not used)
151
152
    //prediction stuff
153
    int chroma_pred_mode;
154
    int intra16x16_pred_mode;
155
    
156
    int8_t intra4x4_pred_mode_cache[5*8];
157
    int8_t (*intra4x4_pred_mode)[8];
158
    void (*pred4x4  [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
159
    void (*pred8x8  [4+3])(uint8_t *src, int stride);
160
    void (*pred16x16[4+3])(uint8_t *src, int stride);
161
    unsigned int topleft_samples_available;
162
    unsigned int top_samples_available;
163
    unsigned int topright_samples_available;
164
    unsigned int left_samples_available;
165
166
    /**
167
     * non zero coeff count cache.
168
     * is 64 if not available.
169
     */
170
    uint8_t non_zero_count_cache[6*8];
171
    uint8_t (*non_zero_count)[16];
172
173
    /**
174
     * Motion vector cache.
175
     */
176
    int16_t mv_cache[2][5*8][2];
177
    int8_t ref_cache[2][5*8];
178
#define LIST_NOT_USED -1 //FIXME rename?
179
#define PART_NOT_AVAILABLE -2
180
    
181
    /**
182
     * is 1 if the specific list MV&references are set to 0,0,-2.
183
     */
184
    int mv_cache_clean[2];
185
186
    int block_offset[16+8];
187
    int chroma_subblock_offset[16]; //FIXME remove
188
    
189
    uint16_t *mb2b_xy; //FIXME are these 4 a good idea?
190
    uint16_t *mb2b8_xy;
191
    int b_stride;
192
    int b8_stride;
193
194 8b82a956 Michael Niedermayer
    int halfpel_flag;
195
    int thirdpel_flag;
196
197 da3b9756 Mike Melanson
    int unknown_svq3_flag;
198
    int next_slice_index;
199
200 0da71265 Michael Niedermayer
    SPS sps_buffer[MAX_SPS_COUNT];
201
    SPS sps; ///< current sps
202
    
203
    PPS pps_buffer[MAX_PPS_COUNT];
204
    /**
205
     * current pps
206
     */
207
    PPS pps; //FIXME move tp Picture perhaps? (->no) do we need that?
208
209
    int slice_num;
210
    uint8_t *slice_table_base;
211
    uint8_t *slice_table;      ///< slice_table_base + mb_stride + 1
212
    int slice_type;
213
    int slice_type_fixed;
214
    
215
    //interlacing specific flags
216
    int mb_field_decoding_flag;
217
    
218
    int sub_mb_type[4];
219
    
220
    //POC stuff
221
    int poc_lsb;
222
    int poc_msb;
223
    int delta_poc_bottom;
224
    int delta_poc[2];
225
    int frame_num;
226
    int prev_poc_msb;             ///< poc_msb of the last reference pic for POC type 0
227
    int prev_poc_lsb;             ///< poc_lsb of the last reference pic for POC type 0
228
    int frame_num_offset;         ///< for POC type 2
229
    int prev_frame_num_offset;    ///< for POC type 2
230
    int prev_frame_num;           ///< frame_num of the last pic for POC type 1/2
231
232
    /**
233
     * frame_num for frames or 2*frame_num for field pics.
234
     */
235
    int curr_pic_num;
236
    
237
    /**
238
     * max_frame_num or 2*max_frame_num for field pics.
239
     */
240
    int max_pic_num;
241
242
    //Weighted pred stuff
243
    int luma_log2_weight_denom;
244
    int chroma_log2_weight_denom;
245
    int luma_weight[2][16];
246
    int luma_offset[2][16];
247
    int chroma_weight[2][16][2];
248
    int chroma_offset[2][16][2];
249
   
250
    //deblock
251
    int disable_deblocking_filter_idc;
252
    int slice_alpha_c0_offset_div2;
253
    int slice_beta_offset_div2;
254
     
255
    int redundant_pic_count;
256
    
257
    int direct_spatial_mv_pred;
258
259
    /**
260
     * num_ref_idx_l0/1_active_minus1 + 1
261
     */
262
    int ref_count[2];// FIXME split for AFF
263
    Picture *short_ref[16];
264
    Picture *long_ref[16];
265
    Picture default_ref_list[2][32];
266
    Picture ref_list[2][32]; //FIXME size?
267
    Picture field_ref_list[2][32]; //FIXME size?
268
    
269
    /**
270
     * memory management control operations buffer.
271
     */
272
    MMCO mmco[MAX_MMCO_COUNT];
273
    int mmco_index;
274
    
275
    int long_ref_count;  ///< number of actual long term references
276
    int short_ref_count; ///< number of actual short term references
277
    
278
    //data partitioning
279
    GetBitContext intra_gb;
280
    GetBitContext inter_gb;
281
    GetBitContext *intra_gb_ptr;
282
    GetBitContext *inter_gb_ptr;
283
    
284
    DCTELEM mb[16*24] __align8;
285
}H264Context;
286
287
static VLC coeff_token_vlc[4];
288
static VLC chroma_dc_coeff_token_vlc;
289
290
static VLC total_zeros_vlc[15];
291
static VLC chroma_dc_total_zeros_vlc[3];
292
293
static VLC run_vlc[6];
294
static VLC run7_vlc;
295
296 8b82a956 Michael Niedermayer
static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
297
static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
298
299 377ec888 Michael Niedermayer
static inline uint32_t pack16to32(int a, int b){
300
#ifdef WORDS_BIGENDIAN
301
   return (b&0xFFFF) + (a<<16);
302
#else
303
   return (a&0xFFFF) + (b<<16);
304
#endif
305
}
306
307 0da71265 Michael Niedermayer
/**
308
 * fill a rectangle.
309
 * @param h height of the recatangle, should be a constant
310
 * @param w width of the recatangle, should be a constant
311
 * @param size the size of val (1 or 4), should be a constant
312
 */
313 af6e2fed Michael Niedermayer
static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
314
    uint8_t *p= (uint8_t*)vp;
315 0da71265 Michael Niedermayer
    assert(size==1 || size==4);
316
    
317
    w      *= size;
318
    stride *= size;
319
    
320
//FIXME check what gcc generates for 64 bit on x86 and possible write a 32 bit ver of it
321
    if(w==2 && h==2){
322
        *(uint16_t*)(p + 0)=
323
        *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
324
    }else if(w==2 && h==4){
325
        *(uint16_t*)(p + 0*stride)=
326
        *(uint16_t*)(p + 1*stride)=
327
        *(uint16_t*)(p + 2*stride)=
328
        *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
329 f7a8c179 Michael Niedermayer
    }else if(w==4 && h==1){
330
        *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
331 0da71265 Michael Niedermayer
    }else if(w==4 && h==2){
332
        *(uint32_t*)(p + 0*stride)=
333
        *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
334
    }else if(w==4 && h==4){
335
        *(uint32_t*)(p + 0*stride)=
336
        *(uint32_t*)(p + 1*stride)=
337
        *(uint32_t*)(p + 2*stride)=
338
        *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
339
    }else if(w==8 && h==1){
340
        *(uint32_t*)(p + 0)=
341
        *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
342
    }else if(w==8 && h==2){
343
        *(uint32_t*)(p + 0 + 0*stride)=
344
        *(uint32_t*)(p + 4 + 0*stride)=
345
        *(uint32_t*)(p + 0 + 1*stride)=
346
        *(uint32_t*)(p + 4 + 1*stride)=  size==4 ? val : val*0x01010101;
347
    }else if(w==8 && h==4){
348
        *(uint64_t*)(p + 0*stride)=
349
        *(uint64_t*)(p + 1*stride)=
350
        *(uint64_t*)(p + 2*stride)=
351
        *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
352
    }else if(w==16 && h==2){
353
        *(uint64_t*)(p + 0+0*stride)=
354
        *(uint64_t*)(p + 8+0*stride)=
355
        *(uint64_t*)(p + 0+1*stride)=
356
        *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
357
    }else if(w==16 && h==4){
358
        *(uint64_t*)(p + 0+0*stride)=
359
        *(uint64_t*)(p + 8+0*stride)=
360
        *(uint64_t*)(p + 0+1*stride)=
361
        *(uint64_t*)(p + 8+1*stride)=
362
        *(uint64_t*)(p + 0+2*stride)=
363
        *(uint64_t*)(p + 8+2*stride)=
364
        *(uint64_t*)(p + 0+3*stride)=
365
        *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
366
    }else
367
        assert(0);
368
}
369
370
static inline void fill_caches(H264Context *h, int mb_type){
371
    MpegEncContext * const s = &h->s;
372 7bc9090a Michael Niedermayer
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
373 0da71265 Michael Niedermayer
    int topleft_xy, top_xy, topright_xy, left_xy[2];
374
    int topleft_type, top_type, topright_type, left_type[2];
375
    int left_block[4];
376
    int i;
377
378
    //wow what a mess, why didnt they simplify the interlacing&intra stuff, i cant imagine that these complex rules are worth it 
379
    
380
    if(h->sps.mb_aff){
381
    //FIXME
382 1df1df0b Fabrice Bellard
        topleft_xy = 0; /* avoid warning */
383
        top_xy = 0; /* avoid warning */
384
        topright_xy = 0; /* avoid warning */
385 0da71265 Michael Niedermayer
    }else{
386 7bc9090a Michael Niedermayer
        topleft_xy = mb_xy-1 - s->mb_stride;
387
        top_xy     = mb_xy   - s->mb_stride;
388
        topright_xy= mb_xy+1 - s->mb_stride;
389 0da71265 Michael Niedermayer
        left_xy[0]   = mb_xy-1;
390
        left_xy[1]   = mb_xy-1;
391
        left_block[0]= 0;
392
        left_block[1]= 1;
393
        left_block[2]= 2;
394
        left_block[3]= 3;
395
    }
396
397
    topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
398
    top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
399
    topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
400
    left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
401
    left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
402
403
    if(IS_INTRA(mb_type)){
404
        h->topleft_samples_available= 
405
        h->top_samples_available= 
406
        h->left_samples_available= 0xFFFF;
407
        h->topright_samples_available= 0xEEEA;
408
409
        if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
410
            h->topleft_samples_available= 0xB3FF;
411
            h->top_samples_available= 0x33FF;
412
            h->topright_samples_available= 0x26EA;
413
        }
414
        for(i=0; i<2; i++){
415
            if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
416
                h->topleft_samples_available&= 0xDF5F;
417
                h->left_samples_available&= 0x5F5F;
418
            }
419
        }
420
        
421
        if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
422
            h->topleft_samples_available&= 0x7FFF;
423
        
424
        if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
425
            h->topright_samples_available&= 0xFBFF;
426
    
427
        if(IS_INTRA4x4(mb_type)){
428
            if(IS_INTRA4x4(top_type)){
429
                h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
430
                h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
431
                h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
432
                h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
433
            }else{
434
                int pred;
435
                if(IS_INTRA16x16(top_type) || (IS_INTER(top_type) && !h->pps.constrained_intra_pred))
436
                    pred= 2;
437
                else{
438
                    pred= -1;
439
                }
440
                h->intra4x4_pred_mode_cache[4+8*0]=
441
                h->intra4x4_pred_mode_cache[5+8*0]=
442
                h->intra4x4_pred_mode_cache[6+8*0]=
443
                h->intra4x4_pred_mode_cache[7+8*0]= pred;
444
            }
445
            for(i=0; i<2; i++){
446
                if(IS_INTRA4x4(left_type[i])){
447
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
448
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
449
                }else{
450
                    int pred;
451
                    if(IS_INTRA16x16(left_type[i]) || (IS_INTER(left_type[i]) && !h->pps.constrained_intra_pred))
452
                        pred= 2;
453
                    else{
454
                        pred= -1;
455
                    }
456
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
457
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
458
                }
459
            }
460
        }
461
    }
462
    
463
    
464
/*
465
0 . T T. T T T T 
466
1 L . .L . . . . 
467
2 L . .L . . . . 
468
3 . T TL . . . . 
469
4 L . .L . . . . 
470
5 L . .. . . . . 
471
*/
472
//FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
473
    if(top_type){
474
        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][0];
475
        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][1];
476
        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][2];
477
        h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
478
    
479
        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][7];
480
        h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
481
    
482
        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][10];
483
        h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
484
    }else{
485
        h->non_zero_count_cache[4+8*0]=      
486
        h->non_zero_count_cache[5+8*0]=
487
        h->non_zero_count_cache[6+8*0]=
488
        h->non_zero_count_cache[7+8*0]=
489
    
490
        h->non_zero_count_cache[1+8*0]=
491
        h->non_zero_count_cache[2+8*0]=
492
    
493
        h->non_zero_count_cache[1+8*3]=
494
        h->non_zero_count_cache[2+8*3]= 64;
495
    }
496
    
497
    if(left_type[0]){
498
        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][6];
499
        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][5];
500
        h->non_zero_count_cache[0+8*1]= h->non_zero_count[left_xy[0]][9]; //FIXME left_block
501
        h->non_zero_count_cache[0+8*4]= h->non_zero_count[left_xy[0]][12];
502
    }else{
503
        h->non_zero_count_cache[3+8*1]= 
504
        h->non_zero_count_cache[3+8*2]= 
505
        h->non_zero_count_cache[0+8*1]= 
506
        h->non_zero_count_cache[0+8*4]= 64;
507
    }
508
    
509
    if(left_type[1]){
510
        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[1]][4];
511
        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[1]][3];
512
        h->non_zero_count_cache[0+8*2]= h->non_zero_count[left_xy[1]][8];
513
        h->non_zero_count_cache[0+8*5]= h->non_zero_count[left_xy[1]][11];
514
    }else{
515
        h->non_zero_count_cache[3+8*3]= 
516
        h->non_zero_count_cache[3+8*4]= 
517
        h->non_zero_count_cache[0+8*2]= 
518
        h->non_zero_count_cache[0+8*5]= 64;
519
    }
520
    
521
#if 1
522
    if(IS_INTER(mb_type)){
523
        int list;
524
        for(list=0; list<2; list++){
525
            if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list)){
526
                /*if(!h->mv_cache_clean[list]){
527
                    memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
528
                    memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
529
                    h->mv_cache_clean[list]= 1;
530
                }*/
531
                continue; //FIXME direct mode ...
532
            }
533
            h->mv_cache_clean[list]= 0;
534
            
535
            if(IS_INTER(topleft_type)){
536
                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
537
                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
538
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
539
                h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
540
            }else{
541
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
542
                h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
543
            }
544
            
545
            if(IS_INTER(top_type)){
546
                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
547
                const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
548
                *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
549
                *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
550
                *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
551
                *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
552
                h->ref_cache[list][scan8[0] + 0 - 1*8]=
553
                h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
554
                h->ref_cache[list][scan8[0] + 2 - 1*8]=
555
                h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
556
            }else{
557
                *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]= 
558
                *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]= 
559
                *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= 
560
                *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
561
                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
562
            }
563
564
            if(IS_INTER(topright_type)){
565
                const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
566
                const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
567
                *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
568
                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
569
            }else{
570
                *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
571
                h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
572
            }
573
            
574
            //FIXME unify cleanup or sth
575
            if(IS_INTER(left_type[0])){
576
                const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
577
                const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
578
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
579
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
580
                h->ref_cache[list][scan8[0] - 1 + 0*8]= 
581
                h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
582
            }else{
583
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
584
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
585
                h->ref_cache[list][scan8[0] - 1 + 0*8]=
586
                h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
587
            }
588
            
589
            if(IS_INTER(left_type[1])){
590
                const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
591
                const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
592
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
593
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
594
                h->ref_cache[list][scan8[0] - 1 + 2*8]= 
595
                h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
596
            }else{
597
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
598
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
599
                h->ref_cache[list][scan8[0] - 1 + 2*8]=
600
                h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
601
            }
602
603
            h->ref_cache[list][scan8[5 ]+1] = 
604
            h->ref_cache[list][scan8[7 ]+1] = 
605
            h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewher else)
606
            h->ref_cache[list][scan8[4 ]] = 
607
            h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
608
            *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
609
            *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
610
            *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewher else)
611
            *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
612
            *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
613
        }
614
//FIXME
615
616
    }
617
#endif
618
}
619
620
static inline void write_back_intra_pred_mode(H264Context *h){
621
    MpegEncContext * const s = &h->s;
622 7bc9090a Michael Niedermayer
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
623 0da71265 Michael Niedermayer
624
    h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
625
    h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
626
    h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
627
    h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
628
    h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
629
    h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
630
    h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
631
}
632
633
/**
634
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
635
 */
636
static inline int check_intra4x4_pred_mode(H264Context *h){
637
    MpegEncContext * const s = &h->s;
638
    static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
639
    static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
640
    int i;
641
    
642
    if(!(h->top_samples_available&0x8000)){
643
        for(i=0; i<4; i++){
644
            int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
645
            if(status<0){
646
                fprintf(stderr, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
647
                return -1;
648
            } else if(status){
649
                h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
650
            }
651
        }
652
    }
653
    
654
    if(!(h->left_samples_available&0x8000)){
655
        for(i=0; i<4; i++){
656
            int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
657
            if(status<0){
658
                fprintf(stderr, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
659
                return -1;
660
            } else if(status){
661
                h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
662
            }
663
        }
664
    }
665
666
    return 0;
667
} //FIXME cleanup like next
668
669
/**
670
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
671
 */
672
static inline int check_intra_pred_mode(H264Context *h, int mode){
673
    MpegEncContext * const s = &h->s;
674
    static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
675
    static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
676
    
677
    if(!(h->top_samples_available&0x8000)){
678
        mode= top[ mode ];
679
        if(mode<0){
680
            fprintf(stderr, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
681
            return -1;
682
        }
683
    }
684
    
685
    if(!(h->left_samples_available&0x8000)){
686
        mode= left[ mode ];
687
        if(mode<0){
688
            fprintf(stderr, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
689
            return -1;
690
        } 
691
    }
692
693
    return mode;
694
}
695
696
/**
697
 * gets the predicted intra4x4 prediction mode.
698
 */
699
static inline int pred_intra_mode(H264Context *h, int n){
700
    const int index8= scan8[n];
701
    const int left= h->intra4x4_pred_mode_cache[index8 - 1];
702
    const int top = h->intra4x4_pred_mode_cache[index8 - 8];
703
    const int min= FFMIN(left, top);
704
705 95c26348 Michael Niedermayer
    tprintf("mode:%d %d min:%d\n", left ,top, min);
706 0da71265 Michael Niedermayer
707
    if(min<0) return DC_PRED;
708
    else      return min;
709
}
710
711
static inline void write_back_non_zero_count(H264Context *h){
712
    MpegEncContext * const s = &h->s;
713 7bc9090a Michael Niedermayer
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
714 0da71265 Michael Niedermayer
715
    h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[4+8*4];
716
    h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[5+8*4];
717
    h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[6+8*4];
718
    h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
719
    h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[7+8*3];
720
    h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[7+8*2];
721
    h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[7+8*1];
722
    
723
    h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[1+8*2];
724
    h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
725
    h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[2+8*1];
726
727
    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[1+8*5];
728
    h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
729
    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[2+8*4];
730
}
731
732
/**
733
 * gets the predicted number of non zero coefficients.
734
 * @param n block index
735
 */
736
static inline int pred_non_zero_count(H264Context *h, int n){
737
    const int index8= scan8[n];
738
    const int left= h->non_zero_count_cache[index8 - 1];
739
    const int top = h->non_zero_count_cache[index8 - 8];
740
    int i= left + top;
741
    
742
    if(i<64) i= (i+1)>>1;
743
744 95c26348 Michael Niedermayer
    tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
745 0da71265 Michael Niedermayer
746
    return i&31;
747
}
748
749 1924f3ce Michael Niedermayer
static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
750
    const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
751
752
    if(topright_ref != PART_NOT_AVAILABLE){
753
        *C= h->mv_cache[list][ i - 8 + part_width ];
754
        return topright_ref;
755
    }else{
756 95c26348 Michael Niedermayer
        tprintf("topright MV not available\n");
757
758 1924f3ce Michael Niedermayer
        *C= h->mv_cache[list][ i - 8 - 1 ];
759
        return h->ref_cache[list][ i - 8 - 1 ];
760
    }
761
}
762
763 0da71265 Michael Niedermayer
/**
764
 * gets the predicted MV.
765
 * @param n the block index
766
 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
767
 * @param mx the x component of the predicted motion vector
768
 * @param my the y component of the predicted motion vector
769
 */
770
static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
771
    const int index8= scan8[n];
772
    const int top_ref=      h->ref_cache[list][ index8 - 8 ];
773
    const int left_ref=     h->ref_cache[list][ index8 - 1 ];
774
    const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
775
    const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
776 1924f3ce Michael Niedermayer
    const int16_t * C;
777
    int diagonal_ref, match_count;
778
779 0da71265 Michael Niedermayer
    assert(part_width==1 || part_width==2 || part_width==4);
780 1924f3ce Michael Niedermayer
781 0da71265 Michael Niedermayer
/* mv_cache
782
  B . . A T T T T 
783
  U . . L . . , .
784
  U . . L . . . .
785
  U . . L . . , .
786
  . . . L . . . .
787
*/
788 1924f3ce Michael Niedermayer
789
    diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
790
    match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
791
    if(match_count > 1){ //most common
792
        *mx= mid_pred(A[0], B[0], C[0]);
793
        *my= mid_pred(A[1], B[1], C[1]);
794
    }else if(match_count==1){
795
        if(left_ref==ref){
796
            *mx= A[0];
797
            *my= A[1];        
798
        }else if(top_ref==ref){
799
            *mx= B[0];
800
            *my= B[1];        
801 0da71265 Michael Niedermayer
        }else{
802 1924f3ce Michael Niedermayer
            *mx= C[0];
803
            *my= C[1];        
804 0da71265 Michael Niedermayer
        }
805
    }else{
806 1924f3ce Michael Niedermayer
        if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
807 0da71265 Michael Niedermayer
            *mx= A[0];
808 1924f3ce Michael Niedermayer
            *my= A[1];        
809 0da71265 Michael Niedermayer
        }else{
810 1924f3ce Michael Niedermayer
            *mx= mid_pred(A[0], B[0], C[0]);
811
            *my= mid_pred(A[1], B[1], C[1]);
812 0da71265 Michael Niedermayer
        }
813
    }
814 1924f3ce Michael Niedermayer
        
815 af6e2fed Michael Niedermayer
    tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
816 0da71265 Michael Niedermayer
}
817
818
/**
819
 * gets the directionally predicted 16x8 MV.
820
 * @param n the block index
821
 * @param mx the x component of the predicted motion vector
822
 * @param my the y component of the predicted motion vector
823
 */
824
static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
825
    if(n==0){
826
        const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
827
        const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
828
829 af6e2fed Michael Niedermayer
        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
830 0da71265 Michael Niedermayer
        
831
        if(top_ref == ref){
832
            *mx= B[0];
833
            *my= B[1];
834
            return;
835
        }
836
    }else{
837
        const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
838
        const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
839
        
840 af6e2fed Michael Niedermayer
        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
841 0da71265 Michael Niedermayer
842
        if(left_ref == ref){
843
            *mx= A[0];
844
            *my= A[1];
845
            return;
846
        }
847
    }
848
849
    //RARE
850
    pred_motion(h, n, 4, list, ref, mx, my);
851
}
852
853
/**
854
 * gets the directionally predicted 8x16 MV.
855
 * @param n the block index
856
 * @param mx the x component of the predicted motion vector
857
 * @param my the y component of the predicted motion vector
858
 */
859
static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
860
    if(n==0){
861
        const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
862
        const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
863
        
864 af6e2fed Michael Niedermayer
        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
865 0da71265 Michael Niedermayer
866
        if(left_ref == ref){
867
            *mx= A[0];
868
            *my= A[1];
869
            return;
870
        }
871
    }else{
872 1924f3ce Michael Niedermayer
        const int16_t * C;
873
        int diagonal_ref;
874
875
        diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
876 0da71265 Michael Niedermayer
        
877 af6e2fed Michael Niedermayer
        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
878 0da71265 Michael Niedermayer
879 1924f3ce Michael Niedermayer
        if(diagonal_ref == ref){ 
880 0da71265 Michael Niedermayer
            *mx= C[0];
881
            *my= C[1];
882
            return;
883
        }
884
    }
885
886
    //RARE
887
    pred_motion(h, n, 2, list, ref, mx, my);
888
}
889
890
static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
891
    const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
892
    const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
893
894 af6e2fed Michael Niedermayer
    tprintf("pred_pskip: (%d) (%d) at %2d %2d", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
895 0da71265 Michael Niedermayer
896
    if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
897
       || (top_ref == 0  && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
898
       || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
899
       
900
        *mx = *my = 0;
901
        return;
902
    }
903
        
904
    pred_motion(h, 0, 4, 0, 0, mx, my);
905
906
    return;
907
}
908
909
static inline void write_back_motion(H264Context *h, int mb_type){
910
    MpegEncContext * const s = &h->s;
911
    const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
912
    const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
913
    int list;
914
915
    for(list=0; list<2; list++){
916
        int y;
917
        if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list)){
918
            if(1){ //FIXME skip or never read if mb_type doesnt use it
919
                for(y=0; y<4; y++){
920
                    *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
921
                    *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
922
                }
923
                for(y=0; y<2; y++){
924
                    *(uint16_t*)s->current_picture.motion_val[list][b8_xy + y*h->b8_stride]= (LIST_NOT_USED&0xFF)*0x0101;
925
                }
926
            }
927
            continue; //FIXME direct mode ...
928
        }
929
        
930
        for(y=0; y<4; y++){
931
            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
932
            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
933
        }
934
        for(y=0; y<2; y++){
935
            s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
936
            s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
937
        }
938
    }
939
}
940
941
/**
942
 * Decodes a network abstraction layer unit.
943
 * @param consumed is the number of bytes used as input
944
 * @param length is the length of the array
945
 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp ttailing?
946
 * @returns decoded bytes, might be src+1 if no escapes 
947
 */
948
static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
949
    int i, si, di;
950
    uint8_t *dst;
951
952
//    src[0]&0x80;                //forbidden bit
953
    h->nal_ref_idc= src[0]>>5;
954
    h->nal_unit_type= src[0]&0x1F;
955
956
    src++; length--;
957
#if 0    
958
    for(i=0; i<length; i++)
959
        printf("%2X ", src[i]);
960
#endif
961
    for(i=0; i+1<length; i+=2){
962
        if(src[i]) continue;
963
        if(i>0 && src[i-1]==0) i--;
964
        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
965
            if(src[i+2]!=3){
966
                /* startcode, so we must be past the end */
967
                length=i;
968
            }
969
            break;
970
        }
971
    }
972
973
    if(i>=length-1){ //no escaped 0
974
        *dst_length= length;
975
        *consumed= length+1; //+1 for the header
976
        return src; 
977
    }
978
979
    h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
980
    dst= h->rbsp_buffer;
981
982
//printf("deoding esc\n");
983
    si=di=0;
984
    while(si<length){ 
985
        //remove escapes (very rare 1:2^22)
986
        if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
987
            if(src[si+2]==3){ //escape
988
                dst[di++]= 0;
989
                dst[di++]= 0;
990
                si+=3;
991
            }else //next start code
992
                break;
993
        }
994
995
        dst[di++]= src[si++];
996
    }
997
998
    *dst_length= di;
999
    *consumed= si + 1;//+1 for the header
1000
//FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1001
    return dst;
1002
}
1003
1004
/**
1005
 * @param src the data which should be escaped
1006
 * @param dst the target buffer, dst+1 == src is allowed as a special case
1007
 * @param length the length of the src data
1008
 * @param dst_length the length of the dst array
1009
 * @returns length of escaped data in bytes or -1 if an error occured
1010
 */
1011
static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1012
    int i, escape_count, si, di;
1013
    uint8_t *temp;
1014
    
1015
    assert(length>=0);
1016
    assert(dst_length>0);
1017
    
1018
    dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1019
1020
    if(length==0) return 1;
1021
1022
    escape_count= 0;
1023
    for(i=0; i<length; i+=2){
1024
        if(src[i]) continue;
1025
        if(i>0 && src[i-1]==0) 
1026
            i--;
1027
        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1028
            escape_count++;
1029
            i+=2;
1030
        }
1031
    }
1032
    
1033
    if(escape_count==0){ 
1034
        if(dst+1 != src)
1035
            memcpy(dst+1, src, length);
1036
        return length + 1;
1037
    }
1038
    
1039
    if(length + escape_count + 1> dst_length)
1040
        return -1;
1041
1042
    //this should be damn rare (hopefully)
1043
1044
    h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1045
    temp= h->rbsp_buffer;
1046
//printf("encoding esc\n");
1047
    
1048
    si= 0;
1049
    di= 0;
1050
    while(si < length){
1051
        if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1052
            temp[di++]= 0; si++;
1053
            temp[di++]= 0; si++;
1054
            temp[di++]= 3; 
1055
            temp[di++]= src[si++];
1056
        }
1057
        else
1058
            temp[di++]= src[si++];
1059
    }
1060
    memcpy(dst+1, temp, length+escape_count);
1061
    
1062
    assert(di == length+escape_count);
1063
    
1064
    return di + 1;
1065
}
1066
1067
/**
1068
 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1069
 */
1070
static void encode_rbsp_trailing(PutBitContext *pb){
1071
    int length;
1072
    put_bits(pb, 1, 1);
1073
    length= (-get_bit_count(pb))&7;
1074
    if(length) put_bits(pb, length, 0);
1075
}
1076
1077
/**
1078
 * identifies the exact end of the bitstream
1079
 * @return the length of the trailing, or 0 if damaged
1080
 */
1081
static int decode_rbsp_trailing(uint8_t *src){
1082
    int v= *src;
1083
    int r;
1084
1085 95c26348 Michael Niedermayer
    tprintf("rbsp trailing %X\n", v);
1086 0da71265 Michael Niedermayer
1087
    for(r=1; r<9; r++){
1088
        if(v&1) return r;
1089
        v>>=1;
1090
    }
1091
    return 0;
1092
}
1093
1094
/**
1095
 * idct tranforms the 16 dc values and dequantize them.
1096
 * @param qp quantization parameter
1097
 */
1098
static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
1099
    const int qmul= dequant_coeff[qp][0];
1100
#define stride 16
1101
    int i;
1102
    int temp[16]; //FIXME check if this is a good idea
1103
    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1104
    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1105
1106
//memset(block, 64, 2*256);
1107
//return;
1108
    for(i=0; i<4; i++){
1109
        const int offset= y_offset[i];
1110
        const int z0= block[offset+stride*0] + block[offset+stride*4];
1111
        const int z1= block[offset+stride*0] - block[offset+stride*4];
1112
        const int z2= block[offset+stride*1] - block[offset+stride*5];
1113
        const int z3= block[offset+stride*1] + block[offset+stride*5];
1114
1115
        temp[4*i+0]= z0+z3;
1116
        temp[4*i+1]= z1+z2;
1117
        temp[4*i+2]= z1-z2;
1118
        temp[4*i+3]= z0-z3;
1119
    }
1120
1121
    for(i=0; i<4; i++){
1122
        const int offset= x_offset[i];
1123
        const int z0= temp[4*0+i] + temp[4*2+i];
1124
        const int z1= temp[4*0+i] - temp[4*2+i];
1125
        const int z2= temp[4*1+i] - temp[4*3+i];
1126
        const int z3= temp[4*1+i] + temp[4*3+i];
1127
1128
        block[stride*0 +offset]= ((z0 + z3)*qmul + 2)>>2; //FIXME think about merging this into decode_resdual
1129
        block[stride*2 +offset]= ((z1 + z2)*qmul + 2)>>2;
1130
        block[stride*8 +offset]= ((z1 - z2)*qmul + 2)>>2;
1131
        block[stride*10+offset]= ((z0 - z3)*qmul + 2)>>2;
1132
    }
1133
}
1134
1135
/**
1136
 * dct tranforms the 16 dc values.
1137
 * @param qp quantization parameter ??? FIXME
1138
 */
1139
static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1140
//    const int qmul= dequant_coeff[qp][0];
1141
    int i;
1142
    int temp[16]; //FIXME check if this is a good idea
1143
    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1144
    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1145
1146
    for(i=0; i<4; i++){
1147
        const int offset= y_offset[i];
1148
        const int z0= block[offset+stride*0] + block[offset+stride*4];
1149
        const int z1= block[offset+stride*0] - block[offset+stride*4];
1150
        const int z2= block[offset+stride*1] - block[offset+stride*5];
1151
        const int z3= block[offset+stride*1] + block[offset+stride*5];
1152
1153
        temp[4*i+0]= z0+z3;
1154
        temp[4*i+1]= z1+z2;
1155
        temp[4*i+2]= z1-z2;
1156
        temp[4*i+3]= z0-z3;
1157
    }
1158
1159
    for(i=0; i<4; i++){
1160
        const int offset= x_offset[i];
1161
        const int z0= temp[4*0+i] + temp[4*2+i];
1162
        const int z1= temp[4*0+i] - temp[4*2+i];
1163
        const int z2= temp[4*1+i] - temp[4*3+i];
1164
        const int z3= temp[4*1+i] + temp[4*3+i];
1165
1166
        block[stride*0 +offset]= (z0 + z3)>>1;
1167
        block[stride*2 +offset]= (z1 + z2)>>1;
1168
        block[stride*8 +offset]= (z1 - z2)>>1;
1169
        block[stride*10+offset]= (z0 - z3)>>1;
1170
    }
1171
}
1172
#undef xStride
1173
#undef stride
1174
1175
static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){
1176
    const int qmul= dequant_coeff[qp][0];
1177
    const int stride= 16*2;
1178
    const int xStride= 16;
1179
    int a,b,c,d,e;
1180
1181
    a= block[stride*0 + xStride*0];
1182
    b= block[stride*0 + xStride*1];
1183
    c= block[stride*1 + xStride*0];
1184
    d= block[stride*1 + xStride*1];
1185
1186
    e= a-b;
1187
    a= a+b;
1188
    b= c-d;
1189
    c= c+d;
1190
1191
    block[stride*0 + xStride*0]= ((a+c)*qmul + 0)>>1;
1192
    block[stride*0 + xStride*1]= ((e+b)*qmul + 0)>>1;
1193
    block[stride*1 + xStride*0]= ((a-c)*qmul + 0)>>1;
1194
    block[stride*1 + xStride*1]= ((e-b)*qmul + 0)>>1;
1195
}
1196
1197
static void chroma_dc_dct_c(DCTELEM *block){
1198
    const int stride= 16*2;
1199
    const int xStride= 16;
1200
    int a,b,c,d,e;
1201
1202
    a= block[stride*0 + xStride*0];
1203
    b= block[stride*0 + xStride*1];
1204
    c= block[stride*1 + xStride*0];
1205
    d= block[stride*1 + xStride*1];
1206
1207
    e= a-b;
1208
    a= a+b;
1209
    b= c-d;
1210
    c= c+d;
1211
1212
    block[stride*0 + xStride*0]= (a+c);
1213
    block[stride*0 + xStride*1]= (e+b);
1214
    block[stride*1 + xStride*0]= (a-c);
1215
    block[stride*1 + xStride*1]= (e-b);
1216
}
1217
1218
/**
1219
 * gets the chroma qp.
1220
 */
1221
static inline int get_chroma_qp(H264Context *h, int qscale){
1222
    
1223
    return chroma_qp[clip(qscale + h->pps.chroma_qp_index_offset, 0, 51)];
1224
}
1225
1226
1227
/**
1228
 *
1229
 */
1230
static void h264_add_idct_c(uint8_t *dst, DCTELEM *block, int stride){
1231
    int i;
1232
    uint8_t *cm = cropTbl + MAX_NEG_CROP;
1233
1234
    block[0] += 32;
1235
#if 1
1236
    for(i=0; i<4; i++){
1237
        const int z0=  block[i + 4*0]     +  block[i + 4*2];
1238
        const int z1=  block[i + 4*0]     -  block[i + 4*2];
1239
        const int z2= (block[i + 4*1]>>1) -  block[i + 4*3];
1240
        const int z3=  block[i + 4*1]     + (block[i + 4*3]>>1);
1241
1242
        block[i + 4*0]= z0 + z3;
1243
        block[i + 4*1]= z1 + z2;
1244
        block[i + 4*2]= z1 - z2;
1245
        block[i + 4*3]= z0 - z3;
1246
    }
1247
1248
    for(i=0; i<4; i++){
1249
        const int z0=  block[0 + 4*i]     +  block[2 + 4*i];
1250
        const int z1=  block[0 + 4*i]     -  block[2 + 4*i];
1251
        const int z2= (block[1 + 4*i]>>1) -  block[3 + 4*i];
1252
        const int z3=  block[1 + 4*i]     + (block[3 + 4*i]>>1);
1253
1254
        dst[0 + i*stride]= cm[ dst[0 + i*stride] + ((z0 + z3) >> 6) ];
1255
        dst[1 + i*stride]= cm[ dst[1 + i*stride] + ((z1 + z2) >> 6) ];
1256
        dst[2 + i*stride]= cm[ dst[2 + i*stride] + ((z1 - z2) >> 6) ];
1257
        dst[3 + i*stride]= cm[ dst[3 + i*stride] + ((z0 - z3) >> 6) ];
1258
    }
1259
#else
1260
    for(i=0; i<4; i++){
1261
        const int z0=  block[0 + 4*i]     +  block[2 + 4*i];
1262
        const int z1=  block[0 + 4*i]     -  block[2 + 4*i];
1263
        const int z2= (block[1 + 4*i]>>1) -  block[3 + 4*i];
1264
        const int z3=  block[1 + 4*i]     + (block[3 + 4*i]>>1);
1265
1266
        block[0 + 4*i]= z0 + z3;
1267
        block[1 + 4*i]= z1 + z2;
1268
        block[2 + 4*i]= z1 - z2;
1269
        block[3 + 4*i]= z0 - z3;
1270
    }
1271
1272
    for(i=0; i<4; i++){
1273
        const int z0=  block[i + 4*0]     +  block[i + 4*2];
1274
        const int z1=  block[i + 4*0]     -  block[i + 4*2];
1275
        const int z2= (block[i + 4*1]>>1) -  block[i + 4*3];
1276
        const int z3=  block[i + 4*1]     + (block[i + 4*3]>>1);
1277
1278
        dst[i + 0*stride]= cm[ dst[i + 0*stride] + ((z0 + z3) >> 6) ];
1279
        dst[i + 1*stride]= cm[ dst[i + 1*stride] + ((z1 + z2) >> 6) ];
1280
        dst[i + 2*stride]= cm[ dst[i + 2*stride] + ((z1 - z2) >> 6) ];
1281
        dst[i + 3*stride]= cm[ dst[i + 3*stride] + ((z0 - z3) >> 6) ];
1282
    }
1283
#endif
1284
}
1285
1286
static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1287
    int i;
1288
    //FIXME try int temp instead of block
1289
    
1290
    for(i=0; i<4; i++){
1291
        const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1292
        const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1293
        const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1294
        const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1295
        const int z0= d0 + d3;
1296
        const int z3= d0 - d3;
1297
        const int z1= d1 + d2;
1298
        const int z2= d1 - d2;
1299
        
1300
        block[0 + 4*i]=   z0 +   z1;
1301
        block[1 + 4*i]= 2*z3 +   z2;
1302
        block[2 + 4*i]=   z0 -   z1;
1303
        block[3 + 4*i]=   z3 - 2*z2;
1304
    }    
1305
1306
    for(i=0; i<4; i++){
1307
        const int z0= block[0*4 + i] + block[3*4 + i];
1308
        const int z3= block[0*4 + i] - block[3*4 + i];
1309
        const int z1= block[1*4 + i] + block[2*4 + i];
1310
        const int z2= block[1*4 + i] - block[2*4 + i];
1311
        
1312
        block[0*4 + i]=   z0 +   z1;
1313
        block[1*4 + i]= 2*z3 +   z2;
1314
        block[2*4 + i]=   z0 -   z1;
1315
        block[3*4 + i]=   z3 - 2*z2;
1316
    }
1317
}
1318
1319
//FIXME need to check that this doesnt overflow signed 32 bit for low qp, iam not sure, its very close
1320
//FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1321
static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1322
    int i;
1323
    const int * const quant_table= quant_coeff[qscale];
1324
    const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1325
    const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1326
    const unsigned int threshold2= (threshold1<<1);
1327
    int last_non_zero;
1328
1329
    if(seperate_dc){
1330
        if(qscale<=18){
1331
            //avoid overflows
1332
            const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1333
            const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1334
            const unsigned int dc_threshold2= (dc_threshold1<<1);
1335
1336
            int level= block[0]*quant_coeff[qscale+18][0];
1337
            if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1338
                if(level>0){
1339
                    level= (dc_bias + level)>>(QUANT_SHIFT-2);
1340
                    block[0]= level;
1341
                }else{
1342
                    level= (dc_bias - level)>>(QUANT_SHIFT-2);
1343
                    block[0]= -level;
1344
                }
1345
//                last_non_zero = i;
1346
            }else{
1347
                block[0]=0;
1348
            }
1349
        }else{
1350
            const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1351
            const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1352
            const unsigned int dc_threshold2= (dc_threshold1<<1);
1353
1354
            int level= block[0]*quant_table[0];
1355
            if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1356
                if(level>0){
1357
                    level= (dc_bias + level)>>(QUANT_SHIFT+1);
1358
                    block[0]= level;
1359
                }else{
1360
                    level= (dc_bias - level)>>(QUANT_SHIFT+1);
1361
                    block[0]= -level;
1362
                }
1363
//                last_non_zero = i;
1364
            }else{
1365
                block[0]=0;
1366
            }
1367
        }
1368
        last_non_zero= 0;
1369
        i=1;
1370
    }else{
1371
        last_non_zero= -1;
1372
        i=0;
1373
    }
1374
1375
    for(; i<16; i++){
1376
        const int j= scantable[i];
1377
        int level= block[j]*quant_table[j];
1378
1379
//        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
1380
//           || bias-level >= (1<<(QMAT_SHIFT - 3))){
1381
        if(((unsigned)(level+threshold1))>threshold2){
1382
            if(level>0){
1383
                level= (bias + level)>>QUANT_SHIFT;
1384
                block[j]= level;
1385
            }else{
1386
                level= (bias - level)>>QUANT_SHIFT;
1387
                block[j]= -level;
1388
            }
1389
            last_non_zero = i;
1390
        }else{
1391
            block[j]=0;
1392
        }
1393
    }
1394
1395
    return last_non_zero;
1396
}
1397
1398
static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1399
    const uint32_t a= ((uint32_t*)(src-stride))[0];
1400
    ((uint32_t*)(src+0*stride))[0]= a;
1401
    ((uint32_t*)(src+1*stride))[0]= a;
1402
    ((uint32_t*)(src+2*stride))[0]= a;
1403
    ((uint32_t*)(src+3*stride))[0]= a;
1404
}
1405
1406
static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1407
    ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1408
    ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1409
    ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1410
    ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1411
}
1412
1413
static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1414
    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1415
                   + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1416
    
1417
    ((uint32_t*)(src+0*stride))[0]= 
1418
    ((uint32_t*)(src+1*stride))[0]= 
1419
    ((uint32_t*)(src+2*stride))[0]= 
1420
    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
1421
}
1422
1423
static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1424
    const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1425
    
1426
    ((uint32_t*)(src+0*stride))[0]= 
1427
    ((uint32_t*)(src+1*stride))[0]= 
1428
    ((uint32_t*)(src+2*stride))[0]= 
1429
    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
1430
}
1431
1432
static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1433
    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1434
    
1435
    ((uint32_t*)(src+0*stride))[0]= 
1436
    ((uint32_t*)(src+1*stride))[0]= 
1437
    ((uint32_t*)(src+2*stride))[0]= 
1438
    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
1439
}
1440
1441
static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1442
    ((uint32_t*)(src+0*stride))[0]= 
1443
    ((uint32_t*)(src+1*stride))[0]= 
1444
    ((uint32_t*)(src+2*stride))[0]= 
1445
    ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1446
}
1447
1448
1449
#define LOAD_TOP_RIGHT_EDGE\
1450
    const int t4= topright[0];\
1451
    const int t5= topright[1];\
1452
    const int t6= topright[2];\
1453
    const int t7= topright[3];\
1454
1455
#define LOAD_LEFT_EDGE\
1456
    const int l0= src[-1+0*stride];\
1457
    const int l1= src[-1+1*stride];\
1458
    const int l2= src[-1+2*stride];\
1459
    const int l3= src[-1+3*stride];\
1460
1461
#define LOAD_TOP_EDGE\
1462
    const int t0= src[ 0-1*stride];\
1463
    const int t1= src[ 1-1*stride];\
1464
    const int t2= src[ 2-1*stride];\
1465
    const int t3= src[ 3-1*stride];\
1466
1467
static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1468
    const int lt= src[-1-1*stride];
1469
    LOAD_TOP_EDGE
1470
    LOAD_LEFT_EDGE
1471
1472
    src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; 
1473
    src[0+2*stride]=
1474
    src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; 
1475
    src[0+1*stride]=
1476
    src[1+2*stride]=
1477
    src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; 
1478
    src[0+0*stride]=
1479
    src[1+1*stride]=
1480
    src[2+2*stride]=
1481
    src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; 
1482
    src[1+0*stride]=
1483
    src[2+1*stride]=
1484
    src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1485
    src[2+0*stride]=
1486
    src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1487
    src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1488 4cfbf61b Falk Hüffner
}
1489 0da71265 Michael Niedermayer
1490
static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1491
    LOAD_TOP_EDGE    
1492
    LOAD_TOP_RIGHT_EDGE    
1493
//    LOAD_LEFT_EDGE    
1494
1495
    src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1496
    src[1+0*stride]=
1497
    src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1498
    src[2+0*stride]=
1499
    src[1+1*stride]=
1500
    src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1501
    src[3+0*stride]=
1502
    src[2+1*stride]=
1503
    src[1+2*stride]=
1504
    src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1505
    src[3+1*stride]=
1506
    src[2+2*stride]=
1507
    src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1508
    src[3+2*stride]=
1509
    src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1510
    src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1511 4cfbf61b Falk Hüffner
}
1512 0da71265 Michael Niedermayer
1513
static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1514
    const int lt= src[-1-1*stride];
1515
    LOAD_TOP_EDGE    
1516
    LOAD_LEFT_EDGE    
1517
    const __attribute__((unused)) int unu= l3;
1518
1519
    src[0+0*stride]=
1520
    src[1+2*stride]=(lt + t0 + 1)>>1;
1521
    src[1+0*stride]=
1522
    src[2+2*stride]=(t0 + t1 + 1)>>1;
1523
    src[2+0*stride]=
1524
    src[3+2*stride]=(t1 + t2 + 1)>>1;
1525
    src[3+0*stride]=(t2 + t3 + 1)>>1;
1526
    src[0+1*stride]=
1527
    src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1528
    src[1+1*stride]=
1529
    src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
1530
    src[2+1*stride]=
1531
    src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1532
    src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1533
    src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1534
    src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1535 4cfbf61b Falk Hüffner
}
1536 0da71265 Michael Niedermayer
1537
static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
1538
    LOAD_TOP_EDGE    
1539
    LOAD_TOP_RIGHT_EDGE    
1540
    const __attribute__((unused)) int unu= t7;
1541
1542
    src[0+0*stride]=(t0 + t1 + 1)>>1;
1543
    src[1+0*stride]=
1544
    src[0+2*stride]=(t1 + t2 + 1)>>1;
1545
    src[2+0*stride]=
1546
    src[1+2*stride]=(t2 + t3 + 1)>>1;
1547
    src[3+0*stride]=
1548
    src[2+2*stride]=(t3 + t4+ 1)>>1;
1549
    src[3+2*stride]=(t4 + t5+ 1)>>1;
1550
    src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1551
    src[1+1*stride]=
1552
    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1553
    src[2+1*stride]=
1554
    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
1555
    src[3+1*stride]=
1556
    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
1557
    src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
1558 4cfbf61b Falk Hüffner
}
1559 0da71265 Michael Niedermayer
1560
static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
1561
    LOAD_LEFT_EDGE    
1562
1563
    src[0+0*stride]=(l0 + l1 + 1)>>1;
1564
    src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1565
    src[2+0*stride]=
1566
    src[0+1*stride]=(l1 + l2 + 1)>>1;
1567
    src[3+0*stride]=
1568
    src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1569
    src[2+1*stride]=
1570
    src[0+2*stride]=(l2 + l3 + 1)>>1;
1571
    src[3+1*stride]=
1572
    src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
1573
    src[3+2*stride]=
1574
    src[1+3*stride]=
1575
    src[0+3*stride]=
1576
    src[2+2*stride]=
1577
    src[2+3*stride]=
1578
    src[3+3*stride]=l3;
1579 4cfbf61b Falk Hüffner
}
1580 0da71265 Michael Niedermayer
    
1581
static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
1582
    const int lt= src[-1-1*stride];
1583
    LOAD_TOP_EDGE    
1584
    LOAD_LEFT_EDGE    
1585
    const __attribute__((unused)) int unu= t3;
1586
1587
    src[0+0*stride]=
1588
    src[2+1*stride]=(lt + l0 + 1)>>1;
1589
    src[1+0*stride]=
1590
    src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
1591
    src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
1592
    src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1593
    src[0+1*stride]=
1594
    src[2+2*stride]=(l0 + l1 + 1)>>1;
1595
    src[1+1*stride]=
1596
    src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1597
    src[0+2*stride]=
1598
    src[2+3*stride]=(l1 + l2+ 1)>>1;
1599
    src[1+2*stride]=
1600
    src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1601
    src[0+3*stride]=(l2 + l3 + 1)>>1;
1602
    src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1603 4cfbf61b Falk Hüffner
}
1604 0da71265 Michael Niedermayer
1605
static void pred16x16_vertical_c(uint8_t *src, int stride){
1606
    int i;
1607
    const uint32_t a= ((uint32_t*)(src-stride))[0];
1608
    const uint32_t b= ((uint32_t*)(src-stride))[1];
1609
    const uint32_t c= ((uint32_t*)(src-stride))[2];
1610
    const uint32_t d= ((uint32_t*)(src-stride))[3];
1611
    
1612
    for(i=0; i<16; i++){
1613
        ((uint32_t*)(src+i*stride))[0]= a;
1614
        ((uint32_t*)(src+i*stride))[1]= b;
1615
        ((uint32_t*)(src+i*stride))[2]= c;
1616
        ((uint32_t*)(src+i*stride))[3]= d;
1617
    }
1618
}
1619
1620
static void pred16x16_horizontal_c(uint8_t *src, int stride){
1621
    int i;
1622
1623
    for(i=0; i<16; i++){
1624
        ((uint32_t*)(src+i*stride))[0]=
1625
        ((uint32_t*)(src+i*stride))[1]=
1626
        ((uint32_t*)(src+i*stride))[2]=
1627
        ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
1628
    }
1629
}
1630
1631
static void pred16x16_dc_c(uint8_t *src, int stride){
1632
    int i, dc=0;
1633
1634
    for(i=0;i<16; i++){
1635
        dc+= src[-1+i*stride];
1636
    }
1637
    
1638
    for(i=0;i<16; i++){
1639
        dc+= src[i-stride];
1640
    }
1641
1642
    dc= 0x01010101*((dc + 16)>>5);
1643
1644
    for(i=0; i<16; i++){
1645
        ((uint32_t*)(src+i*stride))[0]=
1646
        ((uint32_t*)(src+i*stride))[1]=
1647
        ((uint32_t*)(src+i*stride))[2]=
1648
        ((uint32_t*)(src+i*stride))[3]= dc;
1649
    }
1650
}
1651
1652
static void pred16x16_left_dc_c(uint8_t *src, int stride){
1653
    int i, dc=0;
1654
1655
    for(i=0;i<16; i++){
1656
        dc+= src[-1+i*stride];
1657
    }
1658
    
1659
    dc= 0x01010101*((dc + 8)>>4);
1660
1661
    for(i=0; i<16; i++){
1662
        ((uint32_t*)(src+i*stride))[0]=
1663
        ((uint32_t*)(src+i*stride))[1]=
1664
        ((uint32_t*)(src+i*stride))[2]=
1665
        ((uint32_t*)(src+i*stride))[3]= dc;
1666
    }
1667
}
1668
1669
static void pred16x16_top_dc_c(uint8_t *src, int stride){
1670
    int i, dc=0;
1671
1672
    for(i=0;i<16; i++){
1673
        dc+= src[i-stride];
1674
    }
1675
    dc= 0x01010101*((dc + 8)>>4);
1676
1677
    for(i=0; i<16; i++){
1678
        ((uint32_t*)(src+i*stride))[0]=
1679
        ((uint32_t*)(src+i*stride))[1]=
1680
        ((uint32_t*)(src+i*stride))[2]=
1681
        ((uint32_t*)(src+i*stride))[3]= dc;
1682
    }
1683
}
1684
1685
static void pred16x16_128_dc_c(uint8_t *src, int stride){
1686
    int i;
1687
1688
    for(i=0; i<16; i++){
1689
        ((uint32_t*)(src+i*stride))[0]=
1690
        ((uint32_t*)(src+i*stride))[1]=
1691
        ((uint32_t*)(src+i*stride))[2]=
1692
        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
1693
    }
1694
}
1695
1696 8b82a956 Michael Niedermayer
static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
1697 30f73fc7 Michael Niedermayer
  int i, j, k;
1698
  int a;
1699
  uint8_t *cm = cropTbl + MAX_NEG_CROP;
1700
  const uint8_t * const src0 = src+7-stride;
1701
  const uint8_t *src1 = src+8*stride-1;
1702
  const uint8_t *src2 = src1-2*stride;      // == src+6*stride-1;
1703
  int H = src0[1] - src0[-1];
1704
  int V = src1[0] - src2[ 0];
1705
  for(k=2; k<=8; ++k) {
1706
    src1 += stride; src2 -= stride;
1707
    H += k*(src0[k] - src0[-k]);
1708
    V += k*(src1[0] - src2[ 0]);
1709
  }
1710 8b82a956 Michael Niedermayer
  if(svq3){
1711
    H = ( 5*(H/4) ) / 16;
1712
    V = ( 5*(V/4) ) / 16;
1713 2e26c8d2 Mike Melanson
1714
    /* required for 100% accuracy */
1715
    i = H; H = V; V = i;
1716 8b82a956 Michael Niedermayer
  }else{
1717
    H = ( 5*H+32 ) >> 6;
1718
    V = ( 5*V+32 ) >> 6;
1719
  }
1720 30f73fc7 Michael Niedermayer
1721
  a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
1722
  for(j=16; j>0; --j) {
1723
    int b = a;
1724
    a += V;
1725
    for(i=-16; i<0; i+=4) {
1726
      src[16+i] = cm[ (b    ) >> 5 ];
1727
      src[17+i] = cm[ (b+  H) >> 5 ];
1728
      src[18+i] = cm[ (b+2*H) >> 5 ];
1729
      src[19+i] = cm[ (b+3*H) >> 5 ];
1730
      b += 4*H;
1731 0da71265 Michael Niedermayer
    }
1732 30f73fc7 Michael Niedermayer
    src += stride;
1733
  }
1734 0da71265 Michael Niedermayer
}
1735
1736 8b82a956 Michael Niedermayer
static void pred16x16_plane_c(uint8_t *src, int stride){
1737
    pred16x16_plane_compat_c(src, stride, 0);
1738
}
1739
1740 0da71265 Michael Niedermayer
static void pred8x8_vertical_c(uint8_t *src, int stride){
1741
    int i;
1742
    const uint32_t a= ((uint32_t*)(src-stride))[0];
1743
    const uint32_t b= ((uint32_t*)(src-stride))[1];
1744
    
1745
    for(i=0; i<8; i++){
1746
        ((uint32_t*)(src+i*stride))[0]= a;
1747
        ((uint32_t*)(src+i*stride))[1]= b;
1748
    }
1749
}
1750
1751
static void pred8x8_horizontal_c(uint8_t *src, int stride){
1752
    int i;
1753
1754
    for(i=0; i<8; i++){
1755
        ((uint32_t*)(src+i*stride))[0]=
1756
        ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
1757
    }
1758
}
1759
1760
static void pred8x8_128_dc_c(uint8_t *src, int stride){
1761
    int i;
1762
1763
    for(i=0; i<4; i++){
1764
        ((uint32_t*)(src+i*stride))[0]= 
1765
        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
1766
    }
1767
    for(i=4; i<8; i++){
1768
        ((uint32_t*)(src+i*stride))[0]= 
1769
        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
1770
    }
1771
}
1772
1773
static void pred8x8_left_dc_c(uint8_t *src, int stride){
1774
    int i;
1775
    int dc0, dc2;
1776
1777
    dc0=dc2=0;
1778
    for(i=0;i<4; i++){
1779
        dc0+= src[-1+i*stride];
1780
        dc2+= src[-1+(i+4)*stride];
1781
    }
1782
    dc0= 0x01010101*((dc0 + 2)>>2);
1783
    dc2= 0x01010101*((dc2 + 2)>>2);
1784
1785
    for(i=0; i<4; i++){
1786
        ((uint32_t*)(src+i*stride))[0]=
1787
        ((uint32_t*)(src+i*stride))[1]= dc0;
1788
    }
1789
    for(i=4; i<8; i++){
1790
        ((uint32_t*)(src+i*stride))[0]=
1791
        ((uint32_t*)(src+i*stride))[1]= dc2;
1792
    }
1793
}
1794
1795
static void pred8x8_top_dc_c(uint8_t *src, int stride){
1796
    int i;
1797
    int dc0, dc1;
1798
1799
    dc0=dc1=0;
1800
    for(i=0;i<4; i++){
1801
        dc0+= src[i-stride];
1802
        dc1+= src[4+i-stride];
1803
    }
1804
    dc0= 0x01010101*((dc0 + 2)>>2);
1805
    dc1= 0x01010101*((dc1 + 2)>>2);
1806
1807
    for(i=0; i<4; i++){
1808
        ((uint32_t*)(src+i*stride))[0]= dc0;
1809
        ((uint32_t*)(src+i*stride))[1]= dc1;
1810
    }
1811
    for(i=4; i<8; i++){
1812
        ((uint32_t*)(src+i*stride))[0]= dc0;
1813
        ((uint32_t*)(src+i*stride))[1]= dc1;
1814
    }
1815
}
1816
1817
1818
static void pred8x8_dc_c(uint8_t *src, int stride){
1819
    int i;
1820
    int dc0, dc1, dc2, dc3;
1821
1822
    dc0=dc1=dc2=0;
1823
    for(i=0;i<4; i++){
1824
        dc0+= src[-1+i*stride] + src[i-stride];
1825
        dc1+= src[4+i-stride];
1826
        dc2+= src[-1+(i+4)*stride];
1827
    }
1828
    dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
1829
    dc0= 0x01010101*((dc0 + 4)>>3);
1830
    dc1= 0x01010101*((dc1 + 2)>>2);
1831
    dc2= 0x01010101*((dc2 + 2)>>2);
1832
1833
    for(i=0; i<4; i++){
1834
        ((uint32_t*)(src+i*stride))[0]= dc0;
1835
        ((uint32_t*)(src+i*stride))[1]= dc1;
1836
    }
1837
    for(i=4; i<8; i++){
1838
        ((uint32_t*)(src+i*stride))[0]= dc2;
1839
        ((uint32_t*)(src+i*stride))[1]= dc3;
1840
    }
1841
}
1842
1843
static void pred8x8_plane_c(uint8_t *src, int stride){
1844 30f73fc7 Michael Niedermayer
  int j, k;
1845
  int a;
1846
  uint8_t *cm = cropTbl + MAX_NEG_CROP;
1847
  const uint8_t * const src0 = src+3-stride;
1848
  const uint8_t *src1 = src+4*stride-1;
1849
  const uint8_t *src2 = src1-2*stride;      // == src+2*stride-1;
1850
  int H = src0[1] - src0[-1];
1851
  int V = src1[0] - src2[ 0];
1852
  for(k=2; k<=4; ++k) {
1853
    src1 += stride; src2 -= stride;
1854
    H += k*(src0[k] - src0[-k]);
1855
    V += k*(src1[0] - src2[ 0]);
1856
  }
1857
  H = ( 17*H+16 ) >> 5;
1858
  V = ( 17*V+16 ) >> 5;
1859
1860
  a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
1861
  for(j=8; j>0; --j) {
1862
    int b = a;
1863
    a += V;
1864
    src[0] = cm[ (b    ) >> 5 ];
1865
    src[1] = cm[ (b+  H) >> 5 ];
1866
    src[2] = cm[ (b+2*H) >> 5 ];
1867
    src[3] = cm[ (b+3*H) >> 5 ];
1868
    src[4] = cm[ (b+4*H) >> 5 ];
1869
    src[5] = cm[ (b+5*H) >> 5 ];
1870
    src[6] = cm[ (b+6*H) >> 5 ];
1871
    src[7] = cm[ (b+7*H) >> 5 ];
1872
    src += stride;
1873
  }
1874 0da71265 Michael Niedermayer
}
1875
1876
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1877
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1878
                           int src_x_offset, int src_y_offset,
1879
                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1880
    MpegEncContext * const s = &h->s;
1881
    const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1882
    const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1883
    const int luma_xy= (mx&3) + ((my&3)<<2);
1884
    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
1885
    uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
1886
    uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
1887
    int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
1888
    int extra_height= extra_width;
1889
    int emu=0;
1890
    const int full_mx= mx>>2;
1891
    const int full_my= my>>2;
1892
    
1893
    assert(pic->data[0]);
1894
    
1895
    if(mx&7) extra_width -= 3;
1896
    if(my&7) extra_height -= 3;
1897
    
1898
    if(   full_mx < 0-extra_width 
1899
       || full_my < 0-extra_height 
1900
       || full_mx + 16/*FIXME*/ > s->width + extra_width 
1901
       || full_my + 16/*FIXME*/ > s->height + extra_height){
1902 c009df3f Michael Niedermayer
        ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, s->width, s->height);
1903 0da71265 Michael Niedermayer
            src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
1904
        emu=1;
1905
    }
1906
    
1907
    qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
1908
    if(!square){
1909
        qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
1910
    }
1911
    
1912
    if(s->flags&CODEC_FLAG_GRAY) return;
1913
    
1914
    if(emu){
1915 c009df3f Michael Niedermayer
        ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
1916 0da71265 Michael Niedermayer
            src_cb= s->edge_emu_buffer;
1917
    }
1918
    chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
1919
1920
    if(emu){
1921 c009df3f Michael Niedermayer
        ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
1922 0da71265 Michael Niedermayer
            src_cr= s->edge_emu_buffer;
1923
    }
1924
    chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
1925
}
1926
1927
static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1928
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1929
                           int x_offset, int y_offset,
1930
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1931
                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1932
                           int list0, int list1){
1933
    MpegEncContext * const s = &h->s;
1934
    qpel_mc_func *qpix_op=  qpix_put;
1935
    h264_chroma_mc_func chroma_op= chroma_put;
1936
    
1937
    dest_y  += 2*x_offset + 2*y_offset*s->  linesize;
1938
    dest_cb +=   x_offset +   y_offset*s->uvlinesize;
1939
    dest_cr +=   x_offset +   y_offset*s->uvlinesize;
1940
    x_offset += 8*s->mb_x;
1941
    y_offset += 8*s->mb_y;
1942
    
1943
    if(list0){
1944 1924f3ce Michael Niedermayer
        Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1945 0da71265 Michael Niedermayer
        mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1946
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
1947
                           qpix_op, chroma_op);
1948
1949
        qpix_op=  qpix_avg;
1950
        chroma_op= chroma_avg;
1951
    }
1952
1953
    if(list1){
1954 1924f3ce Michael Niedermayer
        Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1955 0da71265 Michael Niedermayer
        mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1956
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
1957
                           qpix_op, chroma_op);
1958
    }
1959
}
1960
1961
static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1962
                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1963
                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg)){
1964
    MpegEncContext * const s = &h->s;
1965 7bc9090a Michael Niedermayer
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1966 0da71265 Michael Niedermayer
    const int mb_type= s->current_picture.mb_type[mb_xy];
1967
    
1968
    assert(IS_INTER(mb_type));
1969
    
1970
    if(IS_16X16(mb_type)){
1971
        mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1972
                qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1973
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1974
    }else if(IS_16X8(mb_type)){
1975
        mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1976
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1977
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1978
        mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1979
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1980
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1981
    }else if(IS_8X16(mb_type)){
1982
        mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
1983
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1984
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1985
        mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
1986
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1987
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1988
    }else{
1989
        int i;
1990
        
1991
        assert(IS_8X8(mb_type));
1992
1993
        for(i=0; i<4; i++){
1994
            const int sub_mb_type= h->sub_mb_type[i];
1995
            const int n= 4*i;
1996
            int x_offset= (i&1)<<2;
1997
            int y_offset= (i&2)<<1;
1998
1999
            if(IS_SUB_8X8(sub_mb_type)){
2000
                mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2001
                    qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2002
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2003
            }else if(IS_SUB_8X4(sub_mb_type)){
2004
                mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2005
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2006
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2007
                mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2008
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2009
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2010
            }else if(IS_SUB_4X8(sub_mb_type)){
2011
                mc_part(h, n  , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2012
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2013
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2014
                mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2015
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2016
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2017
            }else{
2018
                int j;
2019
                assert(IS_SUB_4X4(sub_mb_type));
2020
                for(j=0; j<4; j++){
2021
                    int sub_x_offset= x_offset + 2*(j&1);
2022
                    int sub_y_offset= y_offset +   (j&2);
2023
                    mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2024
                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2025
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2026
                }
2027
            }
2028
        }
2029
    }
2030
}
2031
2032
static void decode_init_vlc(H264Context *h){
2033
    static int done = 0;
2034
2035
    if (!done) {
2036
        int i;
2037
        done = 1;
2038
2039
        init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5, 
2040
                 &chroma_dc_coeff_token_len [0], 1, 1,
2041
                 &chroma_dc_coeff_token_bits[0], 1, 1);
2042
2043
        for(i=0; i<4; i++){
2044
            init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17, 
2045
                     &coeff_token_len [i][0], 1, 1,
2046
                     &coeff_token_bits[i][0], 1, 1);
2047
        }
2048
2049
        for(i=0; i<3; i++){
2050
            init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2051
                     &chroma_dc_total_zeros_len [i][0], 1, 1,
2052
                     &chroma_dc_total_zeros_bits[i][0], 1, 1);
2053
        }
2054
        for(i=0; i<15; i++){
2055
            init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16, 
2056
                     &total_zeros_len [i][0], 1, 1,
2057
                     &total_zeros_bits[i][0], 1, 1);
2058
        }
2059
2060
        for(i=0; i<6; i++){
2061
            init_vlc(&run_vlc[i], RUN_VLC_BITS, 7, 
2062
                     &run_len [i][0], 1, 1,
2063
                     &run_bits[i][0], 1, 1);
2064
        }
2065
        init_vlc(&run7_vlc, RUN7_VLC_BITS, 16, 
2066
                 &run_len [6][0], 1, 1,
2067
                 &run_bits[6][0], 1, 1);
2068
    }
2069
}
2070
2071
/**
2072
 * Sets the intra prediction function pointers.
2073
 */
2074
static void init_pred_ptrs(H264Context *h){
2075
//    MpegEncContext * const s = &h->s;
2076
2077
    h->pred4x4[VERT_PRED           ]= pred4x4_vertical_c;
2078
    h->pred4x4[HOR_PRED            ]= pred4x4_horizontal_c;
2079
    h->pred4x4[DC_PRED             ]= pred4x4_dc_c;
2080
    h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2081
    h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2082
    h->pred4x4[VERT_RIGHT_PRED     ]= pred4x4_vertical_right_c;
2083
    h->pred4x4[HOR_DOWN_PRED       ]= pred4x4_horizontal_down_c;
2084
    h->pred4x4[VERT_LEFT_PRED      ]= pred4x4_vertical_left_c;
2085
    h->pred4x4[HOR_UP_PRED         ]= pred4x4_horizontal_up_c;
2086
    h->pred4x4[LEFT_DC_PRED        ]= pred4x4_left_dc_c;
2087
    h->pred4x4[TOP_DC_PRED         ]= pred4x4_top_dc_c;
2088
    h->pred4x4[DC_128_PRED         ]= pred4x4_128_dc_c;
2089
2090
    h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_c;
2091
    h->pred8x8[VERT_PRED8x8   ]= pred8x8_vertical_c;
2092
    h->pred8x8[HOR_PRED8x8    ]= pred8x8_horizontal_c;
2093
    h->pred8x8[PLANE_PRED8x8  ]= pred8x8_plane_c;
2094
    h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2095
    h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2096
    h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2097
2098
    h->pred16x16[DC_PRED8x8     ]= pred16x16_dc_c;
2099
    h->pred16x16[VERT_PRED8x8   ]= pred16x16_vertical_c;
2100
    h->pred16x16[HOR_PRED8x8    ]= pred16x16_horizontal_c;
2101
    h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_c;
2102
    h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2103
    h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2104
    h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2105
}
2106
2107
static void free_tables(H264Context *h){
2108
    av_freep(&h->intra4x4_pred_mode);
2109
    av_freep(&h->non_zero_count);
2110
    av_freep(&h->slice_table_base);
2111
    h->slice_table= NULL;
2112
    
2113
    av_freep(&h->mb2b_xy);
2114
    av_freep(&h->mb2b8_xy);
2115
}
2116
2117
/**
2118
 * allocates tables.
2119
 * needs widzh/height
2120
 */
2121
static int alloc_tables(H264Context *h){
2122
    MpegEncContext * const s = &h->s;
2123 7bc9090a Michael Niedermayer
    const int big_mb_num= s->mb_stride * (s->mb_height+1);
2124 0da71265 Michael Niedermayer
    int x,y;
2125
2126
    CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
2127
    CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
2128
    CHECKED_ALLOCZ(h->slice_table_base  , big_mb_num * sizeof(uint8_t))
2129
2130
    memset(h->slice_table_base, -1, big_mb_num  * sizeof(uint8_t));
2131 7bc9090a Michael Niedermayer
    h->slice_table= h->slice_table_base + s->mb_stride + 1;
2132 0da71265 Michael Niedermayer
2133
    CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint16_t));
2134
    CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint16_t));
2135
    for(y=0; y<s->mb_height; y++){
2136
        for(x=0; x<s->mb_width; x++){
2137 7bc9090a Michael Niedermayer
            const int mb_xy= x + y*s->mb_stride;
2138 0da71265 Michael Niedermayer
            const int b_xy = 4*x + 4*y*h->b_stride;
2139
            const int b8_xy= 2*x + 2*y*h->b8_stride;
2140
        
2141
            h->mb2b_xy [mb_xy]= b_xy;
2142
            h->mb2b8_xy[mb_xy]= b8_xy;
2143
        }
2144
    }
2145
    
2146
    return 0;
2147
fail:
2148
    free_tables(h);
2149
    return -1;
2150
}
2151
2152
static void common_init(H264Context *h){
2153
    MpegEncContext * const s = &h->s;
2154
2155
    s->width = s->avctx->width;
2156
    s->height = s->avctx->height;
2157
    s->codec_id= s->avctx->codec->id;
2158
    
2159
    init_pred_ptrs(h);
2160
2161
    s->decode=1; //FIXME
2162
}
2163
2164
static int decode_init(AVCodecContext *avctx){
2165
    H264Context *h= avctx->priv_data;
2166
    MpegEncContext * const s = &h->s;
2167
2168
    s->avctx = avctx;
2169
    common_init(h);
2170
2171
    s->out_format = FMT_H264;
2172
    s->workaround_bugs= avctx->workaround_bugs;
2173
2174
    // set defaults
2175
    s->progressive_sequence=1;
2176
//    s->decode_mb= ff_h263_decode_mb;
2177
    s->low_delay= 1;
2178
    avctx->pix_fmt= PIX_FMT_YUV420P;
2179
2180
    decode_init_vlc(h);
2181
    
2182
    return 0;
2183
}
2184
2185
static void frame_start(H264Context *h){
2186
    MpegEncContext * const s = &h->s;
2187
    int i;
2188
2189
    MPV_frame_start(s, s->avctx);
2190
    ff_er_frame_start(s);
2191
    h->mmco_index=0;
2192
2193
    assert(s->linesize && s->uvlinesize);
2194
2195
    for(i=0; i<16; i++){
2196
        h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2197
        h->chroma_subblock_offset[i]= 2*((scan8[i] - scan8[0])&7) + 2*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2198
    }
2199
    for(i=0; i<4; i++){
2200
        h->block_offset[16+i]=
2201
        h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2202
    }
2203
2204
//    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2205
}
2206
2207
static void hl_decode_mb(H264Context *h){
2208
    MpegEncContext * const s = &h->s;
2209
    const int mb_x= s->mb_x;
2210
    const int mb_y= s->mb_y;
2211 7bc9090a Michael Niedermayer
    const int mb_xy= mb_x + mb_y*s->mb_stride;
2212 0da71265 Michael Niedermayer
    const int mb_type= s->current_picture.mb_type[mb_xy];
2213
    uint8_t  *dest_y, *dest_cb, *dest_cr;
2214
    int linesize, uvlinesize /*dct_offset*/;
2215
    int i;
2216
2217
    if(!s->decode)
2218
        return;
2219
2220
    if(s->mb_skiped){
2221
    }
2222
2223
    dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
2224
    dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2225
    dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2226
2227
    if (h->mb_field_decoding_flag) {
2228
        linesize = s->linesize * 2;
2229
        uvlinesize = s->uvlinesize * 2;
2230
        if(mb_y&1){ //FIXME move out of this func?
2231
            dest_y -= s->linesize*15;
2232
            dest_cb-= s->linesize*7;
2233
            dest_cr-= s->linesize*7;
2234
        }
2235
    } else {
2236
        linesize = s->linesize;
2237
        uvlinesize = s->uvlinesize;
2238
//        dct_offset = s->linesize * 16;
2239
    }
2240
2241
    if(IS_INTRA(mb_type)){
2242
        if(!(s->flags&CODEC_FLAG_GRAY)){
2243
            h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2244
            h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2245
        }
2246
2247
        if(IS_INTRA4x4(mb_type)){
2248
            if(!s->encoding){
2249
                for(i=0; i<16; i++){
2250
                    uint8_t * const ptr= dest_y + h->block_offset[i];
2251
                    uint8_t *topright= ptr + 4 - linesize;
2252
                    const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2253
                    const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2254
                    int tr;
2255
2256
                    if(!topright_avail){
2257
                        tr= ptr[3 - linesize]*0x01010101;
2258
                        topright= (uint8_t*) &tr;
2259
                    }
2260
2261
                    h->pred4x4[ dir ](ptr, topright, linesize);
2262 8b82a956 Michael Niedermayer
                    if(h->non_zero_count_cache[ scan8[i] ]){
2263
                        if(s->codec_id == CODEC_ID_H264)
2264
                            h264_add_idct_c(ptr, h->mb + i*16, linesize);
2265
                        else
2266
                            svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2267
                    }
2268 0da71265 Michael Niedermayer
                }
2269
            }
2270
        }else{
2271
            h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2272 8b82a956 Michael Niedermayer
            if(s->codec_id == CODEC_ID_H264)
2273
                h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
2274
            else
2275
                svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2276 0da71265 Michael Niedermayer
        }
2277 8b82a956 Michael Niedermayer
    }else if(s->codec_id == CODEC_ID_H264){
2278 0da71265 Michael Niedermayer
        hl_motion(h, dest_y, dest_cb, dest_cr,
2279
                  s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab, 
2280
                  s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab);
2281
    }
2282
2283
2284
    if(!IS_INTRA4x4(mb_type)){
2285 4704097a Michael Niedermayer
        if(s->codec_id == CODEC_ID_H264){
2286
            for(i=0; i<16; i++){
2287
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2288
                    uint8_t * const ptr= dest_y + h->block_offset[i];
2289 8b82a956 Michael Niedermayer
                    h264_add_idct_c(ptr, h->mb + i*16, linesize);
2290 4704097a Michael Niedermayer
                }
2291
            }
2292
        }else{
2293
            for(i=0; i<16; i++){
2294
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2295
                    uint8_t * const ptr= dest_y + h->block_offset[i];
2296 8b82a956 Michael Niedermayer
                    svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2297 4704097a Michael Niedermayer
                }
2298 0da71265 Michael Niedermayer
            }
2299
        }
2300
    }
2301
2302
    if(!(s->flags&CODEC_FLAG_GRAY)){
2303
        chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
2304
        chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
2305 4704097a Michael Niedermayer
        if(s->codec_id == CODEC_ID_H264){
2306
            for(i=16; i<16+4; i++){
2307
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2308
                    uint8_t * const ptr= dest_cb + h->block_offset[i];
2309 8b82a956 Michael Niedermayer
                    h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
2310 4704097a Michael Niedermayer
                }
2311 0da71265 Michael Niedermayer
            }
2312 4704097a Michael Niedermayer
            for(i=20; i<20+4; i++){
2313
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2314
                    uint8_t * const ptr= dest_cr + h->block_offset[i];
2315 8b82a956 Michael Niedermayer
                    h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
2316 4704097a Michael Niedermayer
                }
2317
            }
2318
        }else{
2319
            for(i=16; i<16+4; i++){
2320
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2321
                    uint8_t * const ptr= dest_cb + h->block_offset[i];
2322 8b82a956 Michael Niedermayer
                    svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2323 4704097a Michael Niedermayer
                }
2324
            }
2325
            for(i=20; i<20+4; i++){
2326
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2327
                    uint8_t * const ptr= dest_cr + h->block_offset[i];
2328
                    svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2329
                }
2330 0da71265 Michael Niedermayer
            }
2331
        }
2332
    }
2333
}
2334
2335
static void decode_mb_cabac(H264Context *h){
2336
//    MpegEncContext * const s = &h->s;
2337
}
2338
2339
/**
2340
 * fills the default_ref_list.
2341
 */
2342
static int fill_default_ref_list(H264Context *h){
2343
    MpegEncContext * const s = &h->s;
2344
    int i;
2345
    Picture sorted_short_ref[16];
2346
    
2347
    if(h->slice_type==B_TYPE){
2348
        int out_i;
2349
        int limit= -1;
2350
2351
        for(out_i=0; out_i<h->short_ref_count; out_i++){
2352
            int best_i=-1;
2353
            int best_poc=-1;
2354
2355
            for(i=0; i<h->short_ref_count; i++){
2356
                const int poc= h->short_ref[i]->poc;
2357
                if(poc > limit && poc < best_poc){
2358
                    best_poc= poc;
2359
                    best_i= i;
2360
                }
2361
            }
2362
            
2363
            assert(best_i != -1);
2364
            
2365
            limit= best_poc;
2366
            sorted_short_ref[out_i]= *h->short_ref[best_i];
2367
        }
2368
    }
2369
2370
    if(s->picture_structure == PICT_FRAME){
2371
        if(h->slice_type==B_TYPE){
2372
            const int current_poc= s->current_picture_ptr->poc;
2373
            int list;
2374
2375
            for(list=0; list<2; list++){
2376
                int index=0;
2377
2378
                for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++){
2379
                    const int i2= list ? h->short_ref_count - i - 1 : i;
2380
                    const int poc= sorted_short_ref[i2].poc;
2381
                    
2382
                    if(sorted_short_ref[i2].reference != 3) continue; //FIXME refernce field shit
2383
2384
                    if((list==1 && poc > current_poc) || (list==0 && poc < current_poc)){
2385
                        h->default_ref_list[list][index  ]= sorted_short_ref[i2];
2386
                        h->default_ref_list[list][index++].pic_id= sorted_short_ref[i2].frame_num;
2387
                    }
2388
                }
2389
2390
                for(i=0; i<h->long_ref_count && index < h->ref_count[ list ]; i++){
2391
                    if(h->long_ref[i]->reference != 3) continue;
2392
2393
                    h->default_ref_list[ list ][index  ]= *h->long_ref[i];
2394
                    h->default_ref_list[ list ][index++].pic_id= i;;
2395
                }
2396
                
2397
                if(h->long_ref_count > 1 && h->short_ref_count==0){
2398
                    Picture temp= h->default_ref_list[1][0];
2399
                    h->default_ref_list[1][0] = h->default_ref_list[1][1];
2400
                    h->default_ref_list[1][0] = temp;
2401
                }
2402
2403
                if(index < h->ref_count[ list ])
2404
                    memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
2405
            }
2406
        }else{
2407
            int index=0;
2408
            for(i=0; i<h->short_ref_count && index < h->ref_count[0]; i++){
2409
                if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
2410
                h->default_ref_list[0][index  ]= *h->short_ref[i];
2411
                h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2412
            }
2413
            for(i=0; i<h->long_ref_count && index < h->ref_count[0]; i++){
2414
                if(h->long_ref[i]->reference != 3) continue;
2415
                h->default_ref_list[0][index  ]= *h->long_ref[i];
2416
                h->default_ref_list[0][index++].pic_id= i;;
2417
            }
2418
            if(index < h->ref_count[0])
2419
                memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2420
        }
2421
    }else{ //FIELD
2422
        if(h->slice_type==B_TYPE){
2423
        }else{
2424
            //FIXME second field balh
2425
        }
2426
    }
2427
    return 0;
2428
}
2429
2430
static int decode_ref_pic_list_reordering(H264Context *h){
2431
    MpegEncContext * const s = &h->s;
2432
    int list;
2433
    
2434
    if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move beofre func
2435
    
2436
    for(list=0; list<2; list++){
2437
        memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2438
2439
        if(get_bits1(&s->gb)){
2440
            int pred= h->curr_pic_num;
2441
            int index;
2442
2443
            for(index=0; ; index++){
2444
                int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2445
                int pic_id;
2446
                int i;
2447
                
2448
                
2449
                if(index >= h->ref_count[list]){
2450
                    fprintf(stderr, "reference count overflow\n");
2451
                    return -1;
2452
                }
2453
                
2454
                if(reordering_of_pic_nums_idc<3){
2455
                    if(reordering_of_pic_nums_idc<2){
2456
                        const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2457
2458
                        if(abs_diff_pic_num >= h->max_pic_num){
2459
                            fprintf(stderr, "abs_diff_pic_num overflow\n");
2460
                            return -1;
2461
                        }
2462
2463
                        if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2464
                        else                                pred+= abs_diff_pic_num;
2465
                        pred &= h->max_pic_num - 1;
2466
                    
2467
                        for(i= h->ref_count[list]-1; i>=index; i--){
2468
                            if(h->ref_list[list][i].pic_id == pred && h->ref_list[list][i].long_ref==0)
2469
                                break;
2470
                        }
2471
                    }else{
2472
                        pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2473
2474
                        for(i= h->ref_count[list]-1; i>=index; i--){
2475
                            if(h->ref_list[list][i].pic_id == pic_id && h->ref_list[list][i].long_ref==1)
2476
                                break;
2477
                        }
2478
                    }
2479
2480
                    if(i < index){
2481
                        fprintf(stderr, "reference picture missing during reorder\n");
2482
                        memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2483
                    }else if(i > index){
2484
                        Picture tmp= h->ref_list[list][i];
2485
                        for(; i>index; i--){
2486
                            h->ref_list[list][i]= h->ref_list[list][i-1];
2487
                        }
2488
                        h->ref_list[list][index]= tmp;
2489
                    }
2490
                }else if(reordering_of_pic_nums_idc==3) 
2491
                    break;
2492
                else{
2493
                    fprintf(stderr, "illegal reordering_of_pic_nums_idc\n");
2494
                    return -1;
2495
                }
2496
            }
2497
        }
2498
2499
        if(h->slice_type!=B_TYPE) break;
2500
    }
2501
    return 0;    
2502
}
2503
2504
static int pred_weight_table(H264Context *h){
2505
    MpegEncContext * const s = &h->s;
2506
    int list, i;
2507
    
2508
    h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2509
    h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2510
2511
    for(list=0; list<2; list++){
2512
        for(i=0; i<h->ref_count[list]; i++){
2513
            int luma_weight_flag, chroma_weight_flag;
2514
            
2515
            luma_weight_flag= get_bits1(&s->gb);
2516
            if(luma_weight_flag){
2517
                h->luma_weight[list][i]= get_se_golomb(&s->gb);
2518
                h->luma_offset[list][i]= get_se_golomb(&s->gb);
2519
            }
2520
2521
            chroma_weight_flag= get_bits1(&s->gb);
2522
            if(chroma_weight_flag){
2523
                int j;
2524
                for(j=0; j<2; j++){
2525
                    h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2526
                    h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2527
                }
2528
            }
2529
        }
2530
        if(h->slice_type != B_TYPE) break;
2531
    }
2532
    return 0;
2533
}
2534
2535
/**
2536
 * instantaneos decoder refresh.
2537
 */
2538
static void idr(H264Context *h){
2539
    int i;
2540
2541
    for(i=0; i<h->long_ref_count; i++){
2542
        h->long_ref[i]->reference=0;
2543
        h->long_ref[i]= NULL;
2544
    }
2545
    h->long_ref_count=0;
2546
2547
    for(i=0; i<h->short_ref_count; i++){
2548
        h->short_ref[i]->reference=0;
2549
        h->short_ref[i]= NULL;
2550
    }
2551
    h->short_ref_count=0;
2552
}
2553
2554
/**
2555
 *
2556
 * @return the removed picture or NULL if an error occures
2557
 */
2558
static Picture * remove_short(H264Context *h, int frame_num){
2559 1924f3ce Michael Niedermayer
    MpegEncContext * const s = &h->s;
2560 0da71265 Michael Niedermayer
    int i;
2561
    
2562 1924f3ce Michael Niedermayer
    if(s->avctx->debug&FF_DEBUG_MMCO)
2563
        printf("remove short %d count %d\n", frame_num, h->short_ref_count);
2564
    
2565 0da71265 Michael Niedermayer
    for(i=0; i<h->short_ref_count; i++){
2566
        Picture *pic= h->short_ref[i];
2567 1924f3ce Michael Niedermayer
        if(s->avctx->debug&FF_DEBUG_MMCO)
2568 3db320ea Falk Hüffner
            printf("%d %d %p\n", i, pic->frame_num, pic);
2569 0da71265 Michael Niedermayer
        if(pic->frame_num == frame_num){
2570
            h->short_ref[i]= NULL;
2571
            memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
2572
            h->short_ref_count--;
2573
            return pic;
2574
        }
2575
    }
2576
    return NULL;
2577
}
2578
2579
/**
2580
 *
2581
 * @return the removed picture or NULL if an error occures
2582
 */
2583
static Picture * remove_long(H264Context *h, int i){
2584
    Picture *pic;
2585
2586
    if(i >= h->long_ref_count) return NULL;
2587
    pic= h->long_ref[i];
2588
    if(pic==NULL) return NULL;
2589
    
2590
    h->long_ref[i]= NULL;
2591
    memmove(&h->long_ref[i], &h->long_ref[i+1], (h->long_ref_count - i - 1)*sizeof(Picture*));
2592
    h->long_ref_count--;
2593
2594
    return pic;
2595
}
2596
2597
/**
2598
 * Executes the reference picture marking (memory management control operations).
2599
 */
2600
static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
2601
    MpegEncContext * const s = &h->s;
2602
    int i;
2603
    int current_is_long=0;
2604
    Picture *pic;
2605
    
2606
    if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
2607
        printf("no mmco here\n");
2608
        
2609
    for(i=0; i<mmco_count; i++){
2610
        if(s->avctx->debug&FF_DEBUG_MMCO)
2611
            printf("mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
2612
2613
        switch(mmco[i].opcode){
2614
        case MMCO_SHORT2UNUSED:
2615
            pic= remove_short(h, mmco[i].short_frame_num);
2616
            if(pic==NULL) return -1;
2617
            pic->reference= 0;
2618
            break;
2619
        case MMCO_SHORT2LONG:
2620
            pic= remove_long(h, mmco[i].long_index);
2621
            if(pic) pic->reference=0;
2622
            
2623
            h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
2624
            h->long_ref[ mmco[i].long_index ]->long_ref=1;
2625
            break;
2626
        case MMCO_LONG2UNUSED:
2627
            pic= remove_long(h, mmco[i].long_index);
2628
            if(pic==NULL) return -1;
2629
            pic->reference= 0;
2630
            break;
2631
        case MMCO_LONG:
2632
            pic= remove_long(h, mmco[i].long_index);
2633
            if(pic) pic->reference=0;
2634
            
2635
            h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
2636
            h->long_ref[ mmco[i].long_index ]->long_ref=1;
2637
            h->long_ref_count++;
2638
            
2639
            current_is_long=1;
2640
            break;
2641
        case MMCO_SET_MAX_LONG:
2642
            assert(mmco[i].long_index <= 16);
2643
            while(mmco[i].long_index < h->long_ref_count){
2644
                pic= remove_long(h, mmco[i].long_index);
2645
                pic->reference=0;
2646
            }
2647
            while(mmco[i].long_index > h->long_ref_count){
2648
                h->long_ref[ h->long_ref_count++ ]= NULL;
2649
            }
2650
            break;
2651
        case MMCO_RESET:
2652
            while(h->short_ref_count){
2653
                pic= remove_short(h, h->short_ref[0]->frame_num);
2654
                pic->reference=0;
2655
            }
2656
            while(h->long_ref_count){
2657
                pic= remove_long(h, h->long_ref_count-1);
2658
                pic->reference=0;
2659
            }
2660
            break;
2661
        default: assert(0);
2662
        }
2663
    }
2664
    
2665
    if(!current_is_long){
2666
        pic= remove_short(h, s->current_picture_ptr->frame_num);
2667
        if(pic){
2668
            pic->reference=0;
2669
            fprintf(stderr, "illegal short term buffer state detected\n");
2670
        }
2671
        
2672
        if(h->short_ref_count)
2673 1924f3ce Michael Niedermayer
            memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
2674
2675
        h->short_ref[0]= s->current_picture_ptr;
2676 0da71265 Michael Niedermayer
        h->short_ref[0]->long_ref=0;
2677
        h->short_ref_count++;
2678
    }
2679
    
2680
    return 0; 
2681
}
2682
2683
static int decode_ref_pic_marking(H264Context *h){
2684
    MpegEncContext * const s = &h->s;
2685
    int i;
2686
    
2687
    if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
2688
        s->broken_link= get_bits1(&s->gb) -1;
2689
        h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
2690
        if(h->mmco[0].long_index == -1)
2691
            h->mmco_index= 0;
2692
        else{
2693
            h->mmco[0].opcode= MMCO_LONG;
2694
            h->mmco_index= 1;
2695
        } 
2696
    }else{
2697
        if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
2698
            for(i= h->mmco_index; i<MAX_MMCO_COUNT; i++) { 
2699
                MMCOOpcode opcode= get_ue_golomb(&s->gb);;
2700
2701
                h->mmco[i].opcode= opcode;
2702
                if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
2703
                    h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
2704
/*                    if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
2705
                        fprintf(stderr, "illegal short ref in memory management control operation %d\n", mmco);
2706
                        return -1;
2707
                    }*/
2708
                }
2709
                if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
2710
                    h->mmco[i].long_index= get_ue_golomb(&s->gb);
2711
                    if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
2712
                        fprintf(stderr, "illegal long ref in memory management control operation %d\n", opcode);
2713
                        return -1;
2714
                    }
2715
                }
2716
                    
2717
                if(opcode > MMCO_LONG){
2718
                    fprintf(stderr, "illegal memory management control operation %d\n", opcode);
2719
                    return -1;
2720
                }
2721
            }
2722
            h->mmco_index= i;
2723
        }else{
2724
            assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
2725
2726
            if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
2727
                h->mmco[0].opcode= MMCO_SHORT2UNUSED;
2728
                h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
2729
                h->mmco_index= 1;
2730
            }else
2731
                h->mmco_index= 0;
2732
        }
2733
    }
2734
    
2735
    return 0; 
2736
}
2737
2738
static int init_poc(H264Context *h){
2739
    MpegEncContext * const s = &h->s;
2740
    const int max_frame_num= 1<<h->sps.log2_max_frame_num;
2741
    int field_poc[2];
2742
2743
    if(h->nal_unit_type == NAL_IDR_SLICE){
2744
        h->frame_num_offset= 0;
2745
    }else{
2746
        if(h->frame_num < h->prev_frame_num)
2747
            h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
2748
        else
2749
            h->frame_num_offset= h->prev_frame_num_offset;
2750
    }
2751
2752
    if(h->sps.poc_type==0){
2753
        const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
2754
2755
        if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
2756
            h->poc_msb = h->prev_poc_msb + max_poc_lsb;
2757
        else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
2758
            h->poc_msb = h->prev_poc_msb - max_poc_lsb;
2759
        else
2760
            h->poc_msb = h->prev_poc_msb;
2761
//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
2762
        field_poc[0] = 
2763
        field_poc[1] = h->poc_msb + h->poc_lsb;
2764
        if(s->picture_structure == PICT_FRAME) 
2765
            field_poc[1] += h->delta_poc_bottom;
2766
    }else if(h->sps.poc_type==1){
2767
        int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
2768
        int i;
2769
2770
        if(h->sps.poc_cycle_length != 0)
2771
            abs_frame_num = h->frame_num_offset + h->frame_num;
2772
        else
2773
            abs_frame_num = 0;
2774
2775
        if(h->nal_ref_idc==0 && abs_frame_num > 0)
2776
            abs_frame_num--;
2777
            
2778
        expected_delta_per_poc_cycle = 0;
2779
        for(i=0; i < h->sps.poc_cycle_length; i++)
2780
            expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
2781
2782
        if(abs_frame_num > 0){
2783
            int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
2784
            int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
2785
2786
            expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
2787
            for(i = 0; i <= frame_num_in_poc_cycle; i++)
2788
                expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
2789
        } else
2790
            expectedpoc = 0;
2791
2792
        if(h->nal_ref_idc == 0) 
2793
            expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
2794
        
2795
        field_poc[0] = expectedpoc + h->delta_poc[0];
2796
        field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
2797
2798
        if(s->picture_structure == PICT_FRAME)
2799
            field_poc[1] += h->delta_poc[1];
2800
    }else{
2801
        int poc;
2802
        if(h->nal_unit_type == NAL_IDR_SLICE){
2803
            poc= 0;
2804
        }else{
2805
            if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
2806
            else               poc= 2*(h->frame_num_offset + h->frame_num) - 1;
2807
        }
2808
        field_poc[0]= poc;
2809
        field_poc[1]= poc;
2810
    }
2811
    
2812
    if(s->picture_structure != PICT_BOTTOM_FIELD)
2813
        s->current_picture_ptr->field_poc[0]= field_poc[0];
2814
    if(s->picture_structure != PICT_TOP_FIELD)
2815
        s->current_picture_ptr->field_poc[1]= field_poc[1];
2816
    if(s->picture_structure == PICT_FRAME) // FIXME field pix?
2817
        s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
2818
2819
    return 0;
2820
}
2821
2822
/**
2823
 * decodes a slice header.
2824
 * this will allso call MPV_common_init() and frame_start() as needed
2825
 */
2826
static int decode_slice_header(H264Context *h){
2827
    MpegEncContext * const s = &h->s;
2828
    int first_mb_in_slice, pps_id;
2829
    int num_ref_idx_active_override_flag;
2830
    static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
2831
2832
    s->current_picture.reference= h->nal_ref_idc != 0;
2833
2834
    first_mb_in_slice= get_ue_golomb(&s->gb);
2835
2836
    h->slice_type= get_ue_golomb(&s->gb);
2837
    if(h->slice_type > 9){
2838
        fprintf(stderr, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
2839
    }
2840
    if(h->slice_type > 4){
2841
        h->slice_type -= 5;
2842
        h->slice_type_fixed=1;
2843
    }else
2844
        h->slice_type_fixed=0;
2845
    
2846
    h->slice_type= slice_type_map[ h->slice_type ];
2847
    
2848
    s->pict_type= h->slice_type; // to make a few old func happy, its wrong though
2849
        
2850
    pps_id= get_ue_golomb(&s->gb);
2851
    if(pps_id>255){
2852
        fprintf(stderr, "pps_id out of range\n");
2853
        return -1;
2854
    }
2855
    h->pps= h->pps_buffer[pps_id];
2856 8b92b792 Michael Niedermayer
    if(h->pps.slice_group_count == 0){
2857
        fprintf(stderr, "non existing PPS referenced\n");
2858
        return -1;
2859
    }
2860
2861 0da71265 Michael Niedermayer
    h->sps= h->sps_buffer[ h->pps.sps_id ];
2862 8b92b792 Michael Niedermayer
    if(h->sps.log2_max_frame_num == 0){
2863
        fprintf(stderr, "non existing SPS referenced\n");
2864
        return -1;
2865
    }
2866 0da71265 Michael Niedermayer
    
2867
    s->mb_width= h->sps.mb_width;
2868
    s->mb_height= h->sps.mb_height;
2869
    
2870
    h->b_stride=  s->mb_width*4;
2871
    h->b8_stride= s->mb_width*2;
2872
2873
    s->mb_x = first_mb_in_slice % s->mb_width;
2874
    s->mb_y = first_mb_in_slice / s->mb_width; //FIXME AFFW
2875
    
2876 a15e68de Michael Niedermayer
    s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
2877 0da71265 Michael Niedermayer
    if(h->sps.frame_mbs_only_flag)
2878 a15e68de Michael Niedermayer
        s->height= 16*s->mb_height - 2*(h->sps.crop_top  + h->sps.crop_bottom);
2879 0da71265 Michael Niedermayer
    else
2880 a15e68de Michael Niedermayer
        s->height= 16*s->mb_height - 4*(h->sps.crop_top  + h->sps.crop_bottom); //FIXME recheck
2881 0da71265 Michael Niedermayer
    
2882
    if (s->context_initialized 
2883 5ff85f1d Michael Niedermayer
        && (   s->width != s->avctx->width || s->height != s->avctx->height)) {
2884 0da71265 Michael Niedermayer
        free_tables(h);
2885
        MPV_common_end(s);
2886
    }
2887
    if (!s->context_initialized) {
2888
        if (MPV_common_init(s) < 0)
2889
            return -1;
2890
2891
        alloc_tables(h);
2892
2893
        s->avctx->width = s->width;
2894
        s->avctx->height = s->height;
2895 5ff85f1d Michael Niedermayer
        s->avctx->sample_aspect_ratio= h->sps.sar;
2896 0da71265 Michael Niedermayer
    }
2897
2898
    if(first_mb_in_slice == 0){
2899
        frame_start(h);
2900
    }
2901
2902 1924f3ce Michael Niedermayer
    s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
2903 0da71265 Michael Niedermayer
    h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
2904
2905
    if(h->sps.frame_mbs_only_flag){
2906
        s->picture_structure= PICT_FRAME;
2907
    }else{
2908
        if(get_bits1(&s->gb)) //field_pic_flag
2909
            s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
2910
        else
2911
            s->picture_structure= PICT_FRAME;
2912
    }
2913
2914
    if(s->picture_structure==PICT_FRAME){
2915
        h->curr_pic_num=   h->frame_num;
2916
        h->max_pic_num= 1<< h->sps.log2_max_frame_num;
2917
    }else{
2918
        h->curr_pic_num= 2*h->frame_num;
2919
        h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
2920
    }
2921
        
2922
    if(h->nal_unit_type == NAL_IDR_SLICE){
2923 1df1df0b Fabrice Bellard
        get_ue_golomb(&s->gb); /* idr_pic_id */
2924 0da71265 Michael Niedermayer
    }
2925
   
2926
    if(h->sps.poc_type==0){
2927
        h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
2928
        
2929
        if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
2930
            h->delta_poc_bottom= get_se_golomb(&s->gb);
2931
        }
2932
    }
2933
    
2934
    if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
2935
        h->delta_poc[0]= get_se_golomb(&s->gb);
2936
        
2937
        if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
2938
            h->delta_poc[1]= get_se_golomb(&s->gb);
2939
    }
2940
    
2941
    init_poc(h);
2942
    
2943
    if(h->pps.redundant_pic_cnt_present){
2944
        h->redundant_pic_count= get_ue_golomb(&s->gb);
2945
    }
2946
2947
    //set defaults, might be overriden a few line later
2948
    h->ref_count[0]= h->pps.ref_count[0];
2949
    h->ref_count[1]= h->pps.ref_count[1];
2950
2951
    if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
2952
        if(h->slice_type == B_TYPE){
2953
            h->direct_spatial_mv_pred= get_bits1(&s->gb);
2954
        }
2955
        num_ref_idx_active_override_flag= get_bits1(&s->gb);
2956
    
2957
        if(num_ref_idx_active_override_flag){
2958
            h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
2959
            if(h->slice_type==B_TYPE)
2960
                h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
2961
2962
            if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
2963
                fprintf(stderr, "reference overflow\n");
2964
                return -1;
2965
            }
2966
        }
2967
    }
2968
2969
    if(first_mb_in_slice == 0){
2970
        fill_default_ref_list(h);
2971
    }
2972
2973
    decode_ref_pic_list_reordering(h);
2974
2975
    if(   (h->pps.weighted_pred          && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE )) 
2976
       || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
2977
        pred_weight_table(h);
2978
    
2979
    if(s->current_picture.reference)
2980
        decode_ref_pic_marking(h);
2981
    //FIXME CABAC stuff
2982
2983
    s->qscale = h->pps.init_qp + get_se_golomb(&s->gb); //slice_qp_delta
2984
    //FIXME qscale / qp ... stuff
2985
    if(h->slice_type == SP_TYPE){
2986 1df1df0b Fabrice Bellard
        get_bits1(&s->gb); /* sp_for_switch_flag */
2987 0da71265 Michael Niedermayer
    }
2988
    if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
2989 1df1df0b Fabrice Bellard
        get_se_golomb(&s->gb); /* slice_qs_delta */
2990 0da71265 Michael Niedermayer
    }
2991
2992
    if( h->pps.deblocking_filter_parameters_present ) {
2993
        h->disable_deblocking_filter_idc= get_ue_golomb(&s->gb);
2994
        if( h->disable_deblocking_filter_idc  !=  1 ) {
2995
            h->slice_alpha_c0_offset_div2= get_se_golomb(&s->gb);
2996
            h->slice_beta_offset_div2= get_se_golomb(&s->gb);
2997
        }
2998
    }else
2999
        h->disable_deblocking_filter_idc= 0;
3000
3001
#if 0 //FMO
3002
    if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3003
        slice_group_change_cycle= get_bits(&s->gb, ?);
3004
#endif
3005
3006
    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3007
        printf("mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d\n", 
3008
               first_mb_in_slice, 
3009 d8085ea7 Michael Niedermayer
               av_get_pict_type_char(h->slice_type),
3010 0da71265 Michael Niedermayer
               pps_id, h->frame_num,
3011
               s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
3012
               h->ref_count[0], h->ref_count[1],
3013
               s->qscale,
3014
               h->disable_deblocking_filter_idc
3015
               );
3016
    }
3017
3018
    return 0;
3019
}
3020
3021
/**
3022
 *
3023
 */
3024
static inline int get_level_prefix(GetBitContext *gb){
3025
    unsigned int buf;
3026
    int log;
3027
    
3028
    OPEN_READER(re, gb);
3029
    UPDATE_CACHE(re, gb);
3030
    buf=GET_CACHE(re, gb);
3031
    
3032
    log= 32 - av_log2(buf);
3033
#ifdef TRACE
3034
    print_bin(buf>>(32-log), log);
3035
    printf("%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
3036
#endif
3037
3038
    LAST_SKIP_BITS(re, gb, log);
3039
    CLOSE_READER(re, gb);
3040
3041
    return log-1;
3042
}
3043
3044
/**
3045
 * decodes a residual block.
3046
 * @param n block index
3047
 * @param scantable scantable
3048
 * @param max_coeff number of coefficients in the block
3049
 * @return <0 if an error occured
3050
 */
3051
static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, int qp, int max_coeff){
3052
    MpegEncContext * const s = &h->s;
3053
    const uint16_t *qmul= dequant_coeff[qp];
3054
    static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
3055
    int level[16], run[16];
3056
    int suffix_length, zeros_left, coeff_num, coeff_token, total_coeff, i, trailing_ones;
3057
3058
    //FIXME put trailing_onex into the context
3059
3060
    if(n == CHROMA_DC_BLOCK_INDEX){
3061
        coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
3062
        total_coeff= coeff_token>>2;
3063
    }else{    
3064
        if(n == LUMA_DC_BLOCK_INDEX){
3065
            total_coeff= pred_non_zero_count(h, 0);
3066
            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
3067
            total_coeff= coeff_token>>2;
3068
        }else{
3069
            total_coeff= pred_non_zero_count(h, n);
3070
            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
3071
            total_coeff= coeff_token>>2;
3072
            h->non_zero_count_cache[ scan8[n] ]= total_coeff;
3073
        }
3074
    }
3075
3076
    //FIXME set last_non_zero?
3077
3078
    if(total_coeff==0)
3079
        return 0;
3080
        
3081
    trailing_ones= coeff_token&3;
3082 95c26348 Michael Niedermayer
    tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
3083 0da71265 Michael Niedermayer
    assert(total_coeff<=16);
3084
    
3085
    for(i=0; i<trailing_ones; i++){
3086
        level[i]= 1 - 2*get_bits1(gb);
3087
    }
3088
3089
    suffix_length= total_coeff > 10 && trailing_ones < 3;
3090
3091
    for(; i<total_coeff; i++){
3092
        const int prefix= get_level_prefix(gb);
3093
        int level_code, mask;
3094
3095
        if(prefix<14){ //FIXME try to build a large unified VLC table for all this
3096
            if(suffix_length)
3097
                level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
3098
            else
3099
                level_code= (prefix<<suffix_length); //part
3100
        }else if(prefix==14){
3101
            if(suffix_length)
3102
                level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
3103
            else
3104
                level_code= prefix + get_bits(gb, 4); //part
3105
        }else if(prefix==15){
3106
            level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
3107
            if(suffix_length==0) level_code+=15; //FIXME doesnt make (much)sense
3108
        }else{
3109
            fprintf(stderr, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
3110
            return -1;
3111
        }
3112
3113
        if(i==trailing_ones && i<3) level_code+= 2; //FIXME split first iteration
3114
3115
        mask= -(level_code&1);
3116
        level[i]= (((2+level_code)>>1) ^ mask) - mask;
3117
3118
        if(suffix_length==0) suffix_length=1; //FIXME split first iteration
3119
3120
#if 1
3121
        if(ABS(level[i]) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
3122