Statistics
| Branch: | Revision:

ffmpeg / libavcodec / h264.c @ 042e3bfe

History | View | Annotate | Download (194 KB)

1
/*
2
 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 *
19
 */
20
 
21
/**
22
 * @file h264.c
23
 * H.264 / AVC / MPEG4 part10 codec.
24
 * @author Michael Niedermayer <michaelni@gmx.at>
25
 */
26

    
27
#include "common.h"
28
#include "dsputil.h"
29
#include "avcodec.h"
30
#include "mpegvideo.h"
31
#include "h264data.h"
32
#include "golomb.h"
33

    
34
#include "cabac.h"
35

    
36
#undef NDEBUG
37
#include <assert.h>
38

    
39
#define interlaced_dct interlaced_dct_is_a_bad_name
40
#define mb_intra mb_intra_isnt_initalized_see_mb_type
41

    
42
#define LUMA_DC_BLOCK_INDEX   25
43
#define CHROMA_DC_BLOCK_INDEX 26
44

    
45
#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8
46
#define COEFF_TOKEN_VLC_BITS           8
47
#define TOTAL_ZEROS_VLC_BITS           9
48
#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3
49
#define RUN_VLC_BITS                   3
50
#define RUN7_VLC_BITS                  6
51

    
52
#define MAX_SPS_COUNT 32
53
#define MAX_PPS_COUNT 256
54

    
55
#define MAX_MMCO_COUNT 66
56

    
57
/**
58
 * Sequence parameter set
59
 */
60
typedef struct SPS{
61
    
62
    int profile_idc;
63
    int level_idc;
64
    int log2_max_frame_num;            ///< log2_max_frame_num_minus4 + 4
65
    int poc_type;                      ///< pic_order_cnt_type
66
    int log2_max_poc_lsb;              ///< log2_max_pic_order_cnt_lsb_minus4
67
    int delta_pic_order_always_zero_flag;
68
    int offset_for_non_ref_pic;
69
    int offset_for_top_to_bottom_field;
70
    int poc_cycle_length;              ///< num_ref_frames_in_pic_order_cnt_cycle
71
    int ref_frame_count;               ///< num_ref_frames
72
    int gaps_in_frame_num_allowed_flag;
73
    int mb_width;                      ///< frame_width_in_mbs_minus1 + 1
74
    int mb_height;                     ///< frame_height_in_mbs_minus1 + 1
75
    int frame_mbs_only_flag;
76
    int mb_aff;                        ///<mb_adaptive_frame_field_flag
77
    int direct_8x8_inference_flag;
78
    int crop;                   ///< frame_cropping_flag
79
    int crop_left;              ///< frame_cropping_rect_left_offset
80
    int crop_right;             ///< frame_cropping_rect_right_offset
81
    int crop_top;               ///< frame_cropping_rect_top_offset
82
    int crop_bottom;            ///< frame_cropping_rect_bottom_offset
83
    int vui_parameters_present_flag;
84
    AVRational sar;
85
    short offset_for_ref_frame[256]; //FIXME dyn aloc?
86
}SPS;
87

    
88
/**
89
 * Picture parameter set
90
 */
91
typedef struct PPS{
92
    int sps_id;
93
    int cabac;                  ///< entropy_coding_mode_flag
94
    int pic_order_present;      ///< pic_order_present_flag
95
    int slice_group_count;      ///< num_slice_groups_minus1 + 1
96
    int mb_slice_group_map_type;
97
    int ref_count[2];           ///< num_ref_idx_l0/1_active_minus1 + 1
98
    int weighted_pred;          ///< weighted_pred_flag
99
    int weighted_bipred_idc;
100
    int init_qp;                ///< pic_init_qp_minus26 + 26
101
    int init_qs;                ///< pic_init_qs_minus26 + 26
102
    int chroma_qp_index_offset;
103
    int deblocking_filter_parameters_present; ///< deblocking_filter_parameters_present_flag
104
    int constrained_intra_pred; ///< constrained_intra_pred_flag
105
    int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
106
}PPS;
107

    
108
/**
109
 * Memory management control operation opcode.
110
 */
111
typedef enum MMCOOpcode{
112
    MMCO_END=0,
113
    MMCO_SHORT2UNUSED,
114
    MMCO_LONG2UNUSED,
115
    MMCO_SHORT2LONG,
116
    MMCO_SET_MAX_LONG,
117
    MMCO_RESET, 
118
    MMCO_LONG,
119
} MMCOOpcode;
120

    
121
/**
122
 * Memory management control operation.
123
 */
124
typedef struct MMCO{
125
    MMCOOpcode opcode;
126
    int short_frame_num;
127
    int long_index;
128
} MMCO;
129

    
130
/**
131
 * H264Context
132
 */
133
typedef struct H264Context{
134
    MpegEncContext s;
135
    int nal_ref_idc;        
136
    int nal_unit_type;
137
#define NAL_SLICE                1
138
#define NAL_DPA                        2
139
#define NAL_DPB                        3
140
#define NAL_DPC                        4
141
#define NAL_IDR_SLICE                5
142
#define NAL_SEI                        6
143
#define NAL_SPS                        7
144
#define NAL_PPS                        8
145
#define NAL_PICTURE_DELIMITER        9
146
#define NAL_FILTER_DATA                10
147
    uint8_t *rbsp_buffer;
148
    int rbsp_buffer_size;
149

    
150
    int chroma_qp; //QPc
151

    
152
    int prev_mb_skiped; //FIXME remove (IMHO not used)
153

    
154
    //prediction stuff
155
    int chroma_pred_mode;
156
    int intra16x16_pred_mode;
157
    
158
    int8_t intra4x4_pred_mode_cache[5*8];
159
    int8_t (*intra4x4_pred_mode)[8];
160
    void (*pred4x4  [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
161
    void (*pred8x8  [4+3])(uint8_t *src, int stride);
162
    void (*pred16x16[4+3])(uint8_t *src, int stride);
163
    unsigned int topleft_samples_available;
164
    unsigned int top_samples_available;
165
    unsigned int topright_samples_available;
166
    unsigned int left_samples_available;
167
    uint8_t (*top_border)[16+2*8];
168
    uint8_t left_border[17+2*9];
169

    
170
    /**
171
     * non zero coeff count cache.
172
     * is 64 if not available.
173
     */
174
    uint8_t non_zero_count_cache[6*8];
175
    uint8_t (*non_zero_count)[16];
176

    
177
    /**
178
     * Motion vector cache.
179
     */
180
    int16_t mv_cache[2][5*8][2];
181
    int8_t ref_cache[2][5*8];
182
#define LIST_NOT_USED -1 //FIXME rename?
183
#define PART_NOT_AVAILABLE -2
184
    
185
    /**
186
     * is 1 if the specific list MV&references are set to 0,0,-2.
187
     */
188
    int mv_cache_clean[2];
189

    
190
    int block_offset[16+8];
191
    int chroma_subblock_offset[16]; //FIXME remove
192
    
193
    uint16_t *mb2b_xy; //FIXME are these 4 a good idea?
194
    uint16_t *mb2b8_xy;
195
    int b_stride;
196
    int b8_stride;
197

    
198
    int halfpel_flag;
199
    int thirdpel_flag;
200

    
201
    int unknown_svq3_flag;
202
    int next_slice_index;
203

    
204
    SPS sps_buffer[MAX_SPS_COUNT];
205
    SPS sps; ///< current sps
206
    
207
    PPS pps_buffer[MAX_PPS_COUNT];
208
    /**
209
     * current pps
210
     */
211
    PPS pps; //FIXME move tp Picture perhaps? (->no) do we need that?
212

    
213
    int slice_num;
214
    uint8_t *slice_table_base;
215
    uint8_t *slice_table;      ///< slice_table_base + mb_stride + 1
216
    int slice_type;
217
    int slice_type_fixed;
218
    
219
    //interlacing specific flags
220
    int mb_field_decoding_flag;
221
    
222
    int sub_mb_type[4];
223
    
224
    //POC stuff
225
    int poc_lsb;
226
    int poc_msb;
227
    int delta_poc_bottom;
228
    int delta_poc[2];
229
    int frame_num;
230
    int prev_poc_msb;             ///< poc_msb of the last reference pic for POC type 0
231
    int prev_poc_lsb;             ///< poc_lsb of the last reference pic for POC type 0
232
    int frame_num_offset;         ///< for POC type 2
233
    int prev_frame_num_offset;    ///< for POC type 2
234
    int prev_frame_num;           ///< frame_num of the last pic for POC type 1/2
235

    
236
    /**
237
     * frame_num for frames or 2*frame_num for field pics.
238
     */
239
    int curr_pic_num;
240
    
241
    /**
242
     * max_frame_num or 2*max_frame_num for field pics.
243
     */
244
    int max_pic_num;
245

    
246
    //Weighted pred stuff
247
    int luma_log2_weight_denom;
248
    int chroma_log2_weight_denom;
249
    int luma_weight[2][16];
250
    int luma_offset[2][16];
251
    int chroma_weight[2][16][2];
252
    int chroma_offset[2][16][2];
253
   
254
    //deblock
255
    int deblocking_filter;         ///< disable_deblocking_filter_idc with 1<->0 
256
    int slice_alpha_c0_offset;
257
    int slice_beta_offset;
258
     
259
    int redundant_pic_count;
260
    
261
    int direct_spatial_mv_pred;
262

    
263
    /**
264
     * num_ref_idx_l0/1_active_minus1 + 1
265
     */
266
    int ref_count[2];// FIXME split for AFF
267
    Picture *short_ref[16];
268
    Picture *long_ref[16];
269
    Picture default_ref_list[2][32];
270
    Picture ref_list[2][32]; //FIXME size?
271
    Picture field_ref_list[2][32]; //FIXME size?
272
    
273
    /**
274
     * memory management control operations buffer.
275
     */
276
    MMCO mmco[MAX_MMCO_COUNT];
277
    int mmco_index;
278
    
279
    int long_ref_count;  ///< number of actual long term references
280
    int short_ref_count; ///< number of actual short term references
281
    
282
    //data partitioning
283
    GetBitContext intra_gb;
284
    GetBitContext inter_gb;
285
    GetBitContext *intra_gb_ptr;
286
    GetBitContext *inter_gb_ptr;
287
    
288
    DCTELEM mb[16*24] __align8;
289

    
290
    /**
291
     * Cabac
292
     */
293
    CABACContext cabac;
294
    uint8_t      cabac_state[399];
295
    int          cabac_init_idc;
296

    
297
    /* 0x100 -> non null luma_dc, 0x80/0x40 -> non null chroma_dc (cb/cr), 0x?0 -> chroma_cbp(0,1,2), 0x0? luma_cbp */
298
    uint16_t     *cbp_table;
299
    uint8_t     *chroma_pred_mode_table;
300
    int         last_qscale_diff;
301

    
302
}H264Context;
303

    
304
static VLC coeff_token_vlc[4];
305
static VLC chroma_dc_coeff_token_vlc;
306

    
307
static VLC total_zeros_vlc[15];
308
static VLC chroma_dc_total_zeros_vlc[3];
309

    
310
static VLC run_vlc[6];
311
static VLC run7_vlc;
312

    
313
static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
314
static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
315
static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr);
316

    
317
static inline uint32_t pack16to32(int a, int b){
318
#ifdef WORDS_BIGENDIAN
319
   return (b&0xFFFF) + (a<<16);
320
#else
321
   return (a&0xFFFF) + (b<<16);
322
#endif
323
}
324

    
325
/**
326
 * fill a rectangle.
327
 * @param h height of the recatangle, should be a constant
328
 * @param w width of the recatangle, should be a constant
329
 * @param size the size of val (1 or 4), should be a constant
330
 */
331
static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
332
    uint8_t *p= (uint8_t*)vp;
333
    assert(size==1 || size==4);
334
    
335
    w      *= size;
336
    stride *= size;
337
    
338
//FIXME check what gcc generates for 64 bit on x86 and possible write a 32 bit ver of it
339
    if(w==2 && h==2){
340
        *(uint16_t*)(p + 0)=
341
        *(uint16_t*)(p + stride)= size==4 ? val : val*0x0101;
342
    }else if(w==2 && h==4){
343
        *(uint16_t*)(p + 0*stride)=
344
        *(uint16_t*)(p + 1*stride)=
345
        *(uint16_t*)(p + 2*stride)=
346
        *(uint16_t*)(p + 3*stride)= size==4 ? val : val*0x0101;
347
    }else if(w==4 && h==1){
348
        *(uint32_t*)(p + 0*stride)= size==4 ? val : val*0x01010101;
349
    }else if(w==4 && h==2){
350
        *(uint32_t*)(p + 0*stride)=
351
        *(uint32_t*)(p + 1*stride)= size==4 ? val : val*0x01010101;
352
    }else if(w==4 && h==4){
353
        *(uint32_t*)(p + 0*stride)=
354
        *(uint32_t*)(p + 1*stride)=
355
        *(uint32_t*)(p + 2*stride)=
356
        *(uint32_t*)(p + 3*stride)= size==4 ? val : val*0x01010101;
357
    }else if(w==8 && h==1){
358
        *(uint32_t*)(p + 0)=
359
        *(uint32_t*)(p + 4)= size==4 ? val : val*0x01010101;
360
    }else if(w==8 && h==2){
361
        *(uint32_t*)(p + 0 + 0*stride)=
362
        *(uint32_t*)(p + 4 + 0*stride)=
363
        *(uint32_t*)(p + 0 + 1*stride)=
364
        *(uint32_t*)(p + 4 + 1*stride)=  size==4 ? val : val*0x01010101;
365
    }else if(w==8 && h==4){
366
        *(uint64_t*)(p + 0*stride)=
367
        *(uint64_t*)(p + 1*stride)=
368
        *(uint64_t*)(p + 2*stride)=
369
        *(uint64_t*)(p + 3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
370
    }else if(w==16 && h==2){
371
        *(uint64_t*)(p + 0+0*stride)=
372
        *(uint64_t*)(p + 8+0*stride)=
373
        *(uint64_t*)(p + 0+1*stride)=
374
        *(uint64_t*)(p + 8+1*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
375
    }else if(w==16 && h==4){
376
        *(uint64_t*)(p + 0+0*stride)=
377
        *(uint64_t*)(p + 8+0*stride)=
378
        *(uint64_t*)(p + 0+1*stride)=
379
        *(uint64_t*)(p + 8+1*stride)=
380
        *(uint64_t*)(p + 0+2*stride)=
381
        *(uint64_t*)(p + 8+2*stride)=
382
        *(uint64_t*)(p + 0+3*stride)=
383
        *(uint64_t*)(p + 8+3*stride)= size==4 ? val*0x0100000001ULL : val*0x0101010101010101ULL;
384
    }else
385
        assert(0);
386
}
387

    
388
static inline void fill_caches(H264Context *h, int mb_type){
389
    MpegEncContext * const s = &h->s;
390
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
391
    int topleft_xy, top_xy, topright_xy, left_xy[2];
392
    int topleft_type, top_type, topright_type, left_type[2];
393
    int left_block[4];
394
    int i;
395

    
396
    //wow what a mess, why didnt they simplify the interlacing&intra stuff, i cant imagine that these complex rules are worth it 
397
    
398
    if(h->sps.mb_aff){
399
    //FIXME
400
        topleft_xy = 0; /* avoid warning */
401
        top_xy = 0; /* avoid warning */
402
        topright_xy = 0; /* avoid warning */
403
    }else{
404
        topleft_xy = mb_xy-1 - s->mb_stride;
405
        top_xy     = mb_xy   - s->mb_stride;
406
        topright_xy= mb_xy+1 - s->mb_stride;
407
        left_xy[0]   = mb_xy-1;
408
        left_xy[1]   = mb_xy-1;
409
        left_block[0]= 0;
410
        left_block[1]= 1;
411
        left_block[2]= 2;
412
        left_block[3]= 3;
413
    }
414

    
415
    topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
416
    top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
417
    topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
418
    left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
419
    left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
420

    
421
    if(IS_INTRA(mb_type)){
422
        h->topleft_samples_available= 
423
        h->top_samples_available= 
424
        h->left_samples_available= 0xFFFF;
425
        h->topright_samples_available= 0xEEEA;
426

    
427
        if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
428
            h->topleft_samples_available= 0xB3FF;
429
            h->top_samples_available= 0x33FF;
430
            h->topright_samples_available= 0x26EA;
431
        }
432
        for(i=0; i<2; i++){
433
            if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
434
                h->topleft_samples_available&= 0xDF5F;
435
                h->left_samples_available&= 0x5F5F;
436
            }
437
        }
438
        
439
        if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
440
            h->topleft_samples_available&= 0x7FFF;
441
        
442
        if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
443
            h->topright_samples_available&= 0xFBFF;
444
    
445
        if(IS_INTRA4x4(mb_type)){
446
            if(IS_INTRA4x4(top_type)){
447
                h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
448
                h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
449
                h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
450
                h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
451
            }else{
452
                int pred;
453
                if(IS_INTRA16x16(top_type) || (IS_INTER(top_type) && !h->pps.constrained_intra_pred))
454
                    pred= 2;
455
                else{
456
                    pred= -1;
457
                }
458
                h->intra4x4_pred_mode_cache[4+8*0]=
459
                h->intra4x4_pred_mode_cache[5+8*0]=
460
                h->intra4x4_pred_mode_cache[6+8*0]=
461
                h->intra4x4_pred_mode_cache[7+8*0]= pred;
462
            }
463
            for(i=0; i<2; i++){
464
                if(IS_INTRA4x4(left_type[i])){
465
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
466
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
467
                }else{
468
                    int pred;
469
                    if(IS_INTRA16x16(left_type[i]) || (IS_INTER(left_type[i]) && !h->pps.constrained_intra_pred))
470
                        pred= 2;
471
                    else{
472
                        pred= -1;
473
                    }
474
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
475
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
476
                }
477
            }
478
        }
479
    }
480
    
481
    
482
/*
483
0 . T T. T T T T 
484
1 L . .L . . . . 
485
2 L . .L . . . . 
486
3 . T TL . . . . 
487
4 L . .L . . . . 
488
5 L . .. . . . . 
489
*/
490
//FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
491
    if(top_type){
492
        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][0];
493
        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][1];
494
        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][2];
495
        h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
496
    
497
        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][7];
498
        h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
499
    
500
        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][10];
501
        h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
502
    }else{
503
        h->non_zero_count_cache[4+8*0]=      
504
        h->non_zero_count_cache[5+8*0]=
505
        h->non_zero_count_cache[6+8*0]=
506
        h->non_zero_count_cache[7+8*0]=
507
    
508
        h->non_zero_count_cache[1+8*0]=
509
        h->non_zero_count_cache[2+8*0]=
510
    
511
        h->non_zero_count_cache[1+8*3]=
512
        h->non_zero_count_cache[2+8*3]= 64;
513
    }
514
    
515
    if(left_type[0]){
516
        h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][6];
517
        h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][5];
518
        h->non_zero_count_cache[0+8*1]= h->non_zero_count[left_xy[0]][9]; //FIXME left_block
519
        h->non_zero_count_cache[0+8*4]= h->non_zero_count[left_xy[0]][12];
520
    }else{
521
        h->non_zero_count_cache[3+8*1]= 
522
        h->non_zero_count_cache[3+8*2]= 
523
        h->non_zero_count_cache[0+8*1]= 
524
        h->non_zero_count_cache[0+8*4]= 64;
525
    }
526
    
527
    if(left_type[1]){
528
        h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[1]][4];
529
        h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[1]][3];
530
        h->non_zero_count_cache[0+8*2]= h->non_zero_count[left_xy[1]][8];
531
        h->non_zero_count_cache[0+8*5]= h->non_zero_count[left_xy[1]][11];
532
    }else{
533
        h->non_zero_count_cache[3+8*3]= 
534
        h->non_zero_count_cache[3+8*4]= 
535
        h->non_zero_count_cache[0+8*2]= 
536
        h->non_zero_count_cache[0+8*5]= 64;
537
    }
538
    
539
#if 1
540
    if(IS_INTER(mb_type)){
541
        int list;
542
        for(list=0; list<2; list++){
543
            if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list)){
544
                /*if(!h->mv_cache_clean[list]){
545
                    memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
546
                    memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
547
                    h->mv_cache_clean[list]= 1;
548
                }*/
549
                continue; //FIXME direct mode ...
550
            }
551
            h->mv_cache_clean[list]= 0;
552
            
553
            if(IS_INTER(topleft_type)){
554
                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
555
                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
556
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
557
                h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
558
            }else{
559
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
560
                h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
561
            }
562
            
563
            if(IS_INTER(top_type)){
564
                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
565
                const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
566
                *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
567
                *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
568
                *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
569
                *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
570
                h->ref_cache[list][scan8[0] + 0 - 1*8]=
571
                h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
572
                h->ref_cache[list][scan8[0] + 2 - 1*8]=
573
                h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
574
            }else{
575
                *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]= 
576
                *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]= 
577
                *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= 
578
                *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
579
                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
580
            }
581

    
582
            if(IS_INTER(topright_type)){
583
                const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
584
                const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
585
                *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
586
                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
587
            }else{
588
                *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
589
                h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
590
            }
591
            
592
            //FIXME unify cleanup or sth
593
            if(IS_INTER(left_type[0])){
594
                const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
595
                const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
596
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
597
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
598
                h->ref_cache[list][scan8[0] - 1 + 0*8]= 
599
                h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
600
            }else{
601
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
602
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0;
603
                h->ref_cache[list][scan8[0] - 1 + 0*8]=
604
                h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
605
            }
606
            
607
            if(IS_INTER(left_type[1])){
608
                const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
609
                const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
610
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
611
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
612
                h->ref_cache[list][scan8[0] - 1 + 2*8]= 
613
                h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
614
            }else{
615
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
616
                *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
617
                h->ref_cache[list][scan8[0] - 1 + 2*8]=
618
                h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
619
            }
620

    
621
            h->ref_cache[list][scan8[5 ]+1] = 
622
            h->ref_cache[list][scan8[7 ]+1] = 
623
            h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewher else)
624
            h->ref_cache[list][scan8[4 ]] = 
625
            h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
626
            *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
627
            *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
628
            *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewher else)
629
            *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
630
            *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
631
        }
632
//FIXME
633

    
634
    }
635
#endif
636
}
637

    
638
static inline void write_back_intra_pred_mode(H264Context *h){
639
    MpegEncContext * const s = &h->s;
640
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
641

    
642
    h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
643
    h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
644
    h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
645
    h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
646
    h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
647
    h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
648
    h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
649
}
650

    
651
/**
652
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
653
 */
654
static inline int check_intra4x4_pred_mode(H264Context *h){
655
    MpegEncContext * const s = &h->s;
656
    static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
657
    static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
658
    int i;
659
    
660
    if(!(h->top_samples_available&0x8000)){
661
        for(i=0; i<4; i++){
662
            int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
663
            if(status<0){
664
                av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
665
                return -1;
666
            } else if(status){
667
                h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
668
            }
669
        }
670
    }
671
    
672
    if(!(h->left_samples_available&0x8000)){
673
        for(i=0; i<4; i++){
674
            int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
675
            if(status<0){
676
                av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
677
                return -1;
678
            } else if(status){
679
                h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
680
            }
681
        }
682
    }
683

    
684
    return 0;
685
} //FIXME cleanup like next
686

    
687
/**
688
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
689
 */
690
static inline int check_intra_pred_mode(H264Context *h, int mode){
691
    MpegEncContext * const s = &h->s;
692
    static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
693
    static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
694
    
695
    if(!(h->top_samples_available&0x8000)){
696
        mode= top[ mode ];
697
        if(mode<0){
698
            av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
699
            return -1;
700
        }
701
    }
702
    
703
    if(!(h->left_samples_available&0x8000)){
704
        mode= left[ mode ];
705
        if(mode<0){
706
            av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
707
            return -1;
708
        } 
709
    }
710

    
711
    return mode;
712
}
713

    
714
/**
715
 * gets the predicted intra4x4 prediction mode.
716
 */
717
static inline int pred_intra_mode(H264Context *h, int n){
718
    const int index8= scan8[n];
719
    const int left= h->intra4x4_pred_mode_cache[index8 - 1];
720
    const int top = h->intra4x4_pred_mode_cache[index8 - 8];
721
    const int min= FFMIN(left, top);
722

    
723
    tprintf("mode:%d %d min:%d\n", left ,top, min);
724

    
725
    if(min<0) return DC_PRED;
726
    else      return min;
727
}
728

    
729
static inline void write_back_non_zero_count(H264Context *h){
730
    MpegEncContext * const s = &h->s;
731
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
732

    
733
    h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[4+8*4];
734
    h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[5+8*4];
735
    h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[6+8*4];
736
    h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
737
    h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[7+8*3];
738
    h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[7+8*2];
739
    h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[7+8*1];
740
    
741
    h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[1+8*2];
742
    h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
743
    h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[2+8*1];
744

    
745
    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[1+8*5];
746
    h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
747
    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[2+8*4];
748
}
749

    
750
/**
751
 * gets the predicted number of non zero coefficients.
752
 * @param n block index
753
 */
754
static inline int pred_non_zero_count(H264Context *h, int n){
755
    const int index8= scan8[n];
756
    const int left= h->non_zero_count_cache[index8 - 1];
757
    const int top = h->non_zero_count_cache[index8 - 8];
758
    int i= left + top;
759
    
760
    if(i<64) i= (i+1)>>1;
761

    
762
    tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
763

    
764
    return i&31;
765
}
766

    
767
static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
768
    const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
769

    
770
    if(topright_ref != PART_NOT_AVAILABLE){
771
        *C= h->mv_cache[list][ i - 8 + part_width ];
772
        return topright_ref;
773
    }else{
774
        tprintf("topright MV not available\n");
775

    
776
        *C= h->mv_cache[list][ i - 8 - 1 ];
777
        return h->ref_cache[list][ i - 8 - 1 ];
778
    }
779
}
780

    
781
/**
782
 * gets the predicted MV.
783
 * @param n the block index
784
 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
785
 * @param mx the x component of the predicted motion vector
786
 * @param my the y component of the predicted motion vector
787
 */
788
static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
789
    const int index8= scan8[n];
790
    const int top_ref=      h->ref_cache[list][ index8 - 8 ];
791
    const int left_ref=     h->ref_cache[list][ index8 - 1 ];
792
    const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
793
    const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
794
    const int16_t * C;
795
    int diagonal_ref, match_count;
796

    
797
    assert(part_width==1 || part_width==2 || part_width==4);
798

    
799
/* mv_cache
800
  B . . A T T T T 
801
  U . . L . . , .
802
  U . . L . . . .
803
  U . . L . . , .
804
  . . . L . . . .
805
*/
806

    
807
    diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
808
    match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
809
    if(match_count > 1){ //most common
810
        *mx= mid_pred(A[0], B[0], C[0]);
811
        *my= mid_pred(A[1], B[1], C[1]);
812
    }else if(match_count==1){
813
        if(left_ref==ref){
814
            *mx= A[0];
815
            *my= A[1];        
816
        }else if(top_ref==ref){
817
            *mx= B[0];
818
            *my= B[1];        
819
        }else{
820
            *mx= C[0];
821
            *my= C[1];        
822
        }
823
    }else{
824
        if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
825
            *mx= A[0];
826
            *my= A[1];        
827
        }else{
828
            *mx= mid_pred(A[0], B[0], C[0]);
829
            *my= mid_pred(A[1], B[1], C[1]);
830
        }
831
    }
832
        
833
    tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
834
}
835

    
836
/**
837
 * gets the directionally predicted 16x8 MV.
838
 * @param n the block index
839
 * @param mx the x component of the predicted motion vector
840
 * @param my the y component of the predicted motion vector
841
 */
842
static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
843
    if(n==0){
844
        const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
845
        const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
846

    
847
        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
848
        
849
        if(top_ref == ref){
850
            *mx= B[0];
851
            *my= B[1];
852
            return;
853
        }
854
    }else{
855
        const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
856
        const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
857
        
858
        tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
859

    
860
        if(left_ref == ref){
861
            *mx= A[0];
862
            *my= A[1];
863
            return;
864
        }
865
    }
866

    
867
    //RARE
868
    pred_motion(h, n, 4, list, ref, mx, my);
869
}
870

    
871
/**
872
 * gets the directionally predicted 8x16 MV.
873
 * @param n the block index
874
 * @param mx the x component of the predicted motion vector
875
 * @param my the y component of the predicted motion vector
876
 */
877
static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
878
    if(n==0){
879
        const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
880
        const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
881
        
882
        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
883

    
884
        if(left_ref == ref){
885
            *mx= A[0];
886
            *my= A[1];
887
            return;
888
        }
889
    }else{
890
        const int16_t * C;
891
        int diagonal_ref;
892

    
893
        diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
894
        
895
        tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
896

    
897
        if(diagonal_ref == ref){ 
898
            *mx= C[0];
899
            *my= C[1];
900
            return;
901
        }
902
    }
903

    
904
    //RARE
905
    pred_motion(h, n, 2, list, ref, mx, my);
906
}
907

    
908
static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
909
    const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
910
    const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
911

    
912
    tprintf("pred_pskip: (%d) (%d) at %2d %2d", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
913

    
914
    if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
915
       || (top_ref == 0  && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
916
       || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
917
       
918
        *mx = *my = 0;
919
        return;
920
    }
921
        
922
    pred_motion(h, 0, 4, 0, 0, mx, my);
923

    
924
    return;
925
}
926

    
927
static inline void write_back_motion(H264Context *h, int mb_type){
928
    MpegEncContext * const s = &h->s;
929
    const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
930
    const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
931
    int list;
932

    
933
    for(list=0; list<2; list++){
934
        int y;
935
        if((!IS_8X8(mb_type)) && !USES_LIST(mb_type, list)){
936
            if(1){ //FIXME skip or never read if mb_type doesnt use it
937
                for(y=0; y<4; y++){
938
                    *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]=
939
                    *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0;
940
                }
941
                for(y=0; y<2; y++){
942
                    *(uint16_t*)s->current_picture.motion_val[list][b8_xy + y*h->b8_stride]= (LIST_NOT_USED&0xFF)*0x0101;
943
                }
944
            }
945
            continue; //FIXME direct mode ...
946
        }
947
        
948
        for(y=0; y<4; y++){
949
            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
950
            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
951
        }
952
        for(y=0; y<2; y++){
953
            s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y];
954
            s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
955
        }
956
    }
957
}
958

    
959
/**
960
 * Decodes a network abstraction layer unit.
961
 * @param consumed is the number of bytes used as input
962
 * @param length is the length of the array
963
 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp ttailing?
964
 * @returns decoded bytes, might be src+1 if no escapes 
965
 */
966
static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
967
    int i, si, di;
968
    uint8_t *dst;
969

    
970
//    src[0]&0x80;                //forbidden bit
971
    h->nal_ref_idc= src[0]>>5;
972
    h->nal_unit_type= src[0]&0x1F;
973

    
974
    src++; length--;
975
#if 0    
976
    for(i=0; i<length; i++)
977
        printf("%2X ", src[i]);
978
#endif
979
    for(i=0; i+1<length; i+=2){
980
        if(src[i]) continue;
981
        if(i>0 && src[i-1]==0) i--;
982
        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
983
            if(src[i+2]!=3){
984
                /* startcode, so we must be past the end */
985
                length=i;
986
            }
987
            break;
988
        }
989
    }
990

    
991
    if(i>=length-1){ //no escaped 0
992
        *dst_length= length;
993
        *consumed= length+1; //+1 for the header
994
        return src; 
995
    }
996

    
997
    h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
998
    dst= h->rbsp_buffer;
999

    
1000
//printf("deoding esc\n");
1001
    si=di=0;
1002
    while(si<length){ 
1003
        //remove escapes (very rare 1:2^22)
1004
        if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1005
            if(src[si+2]==3){ //escape
1006
                dst[di++]= 0;
1007
                dst[di++]= 0;
1008
                si+=3;
1009
            }else //next start code
1010
                break;
1011
        }
1012

    
1013
        dst[di++]= src[si++];
1014
    }
1015

    
1016
    *dst_length= di;
1017
    *consumed= si + 1;//+1 for the header
1018
//FIXME store exact number of bits in the getbitcontext (its needed for decoding)
1019
    return dst;
1020
}
1021

    
1022
#if 0
1023
/**
1024
 * @param src the data which should be escaped
1025
 * @param dst the target buffer, dst+1 == src is allowed as a special case
1026
 * @param length the length of the src data
1027
 * @param dst_length the length of the dst array
1028
 * @returns length of escaped data in bytes or -1 if an error occured
1029
 */
1030
static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
1031
    int i, escape_count, si, di;
1032
    uint8_t *temp;
1033
    
1034
    assert(length>=0);
1035
    assert(dst_length>0);
1036
    
1037
    dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
1038

1039
    if(length==0) return 1;
1040

1041
    escape_count= 0;
1042
    for(i=0; i<length; i+=2){
1043
        if(src[i]) continue;
1044
        if(i>0 && src[i-1]==0) 
1045
            i--;
1046
        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1047
            escape_count++;
1048
            i+=2;
1049
        }
1050
    }
1051
    
1052
    if(escape_count==0){ 
1053
        if(dst+1 != src)
1054
            memcpy(dst+1, src, length);
1055
        return length + 1;
1056
    }
1057
    
1058
    if(length + escape_count + 1> dst_length)
1059
        return -1;
1060

1061
    //this should be damn rare (hopefully)
1062

1063
    h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
1064
    temp= h->rbsp_buffer;
1065
//printf("encoding esc\n");
1066
    
1067
    si= 0;
1068
    di= 0;
1069
    while(si < length){
1070
        if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1071
            temp[di++]= 0; si++;
1072
            temp[di++]= 0; si++;
1073
            temp[di++]= 3; 
1074
            temp[di++]= src[si++];
1075
        }
1076
        else
1077
            temp[di++]= src[si++];
1078
    }
1079
    memcpy(dst+1, temp, length+escape_count);
1080
    
1081
    assert(di == length+escape_count);
1082
    
1083
    return di + 1;
1084
}
1085

1086
/**
1087
 * write 1,10,100,1000,... for alignment, yes its exactly inverse to mpeg4
1088
 */
1089
static void encode_rbsp_trailing(PutBitContext *pb){
1090
    int length;
1091
    put_bits(pb, 1, 1);
1092
    length= (-put_bits_count(pb))&7;
1093
    if(length) put_bits(pb, length, 0);
1094
}
1095
#endif
1096

    
1097
/**
1098
 * identifies the exact end of the bitstream
1099
 * @return the length of the trailing, or 0 if damaged
1100
 */
1101
static int decode_rbsp_trailing(uint8_t *src){
1102
    int v= *src;
1103
    int r;
1104

    
1105
    tprintf("rbsp trailing %X\n", v);
1106

    
1107
    for(r=1; r<9; r++){
1108
        if(v&1) return r;
1109
        v>>=1;
1110
    }
1111
    return 0;
1112
}
1113

    
1114
/**
1115
 * idct tranforms the 16 dc values and dequantize them.
1116
 * @param qp quantization parameter
1117
 */
1118
static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
1119
    const int qmul= dequant_coeff[qp][0];
1120
#define stride 16
1121
    int i;
1122
    int temp[16]; //FIXME check if this is a good idea
1123
    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1124
    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1125

    
1126
//memset(block, 64, 2*256);
1127
//return;
1128
    for(i=0; i<4; i++){
1129
        const int offset= y_offset[i];
1130
        const int z0= block[offset+stride*0] + block[offset+stride*4];
1131
        const int z1= block[offset+stride*0] - block[offset+stride*4];
1132
        const int z2= block[offset+stride*1] - block[offset+stride*5];
1133
        const int z3= block[offset+stride*1] + block[offset+stride*5];
1134

    
1135
        temp[4*i+0]= z0+z3;
1136
        temp[4*i+1]= z1+z2;
1137
        temp[4*i+2]= z1-z2;
1138
        temp[4*i+3]= z0-z3;
1139
    }
1140

    
1141
    for(i=0; i<4; i++){
1142
        const int offset= x_offset[i];
1143
        const int z0= temp[4*0+i] + temp[4*2+i];
1144
        const int z1= temp[4*0+i] - temp[4*2+i];
1145
        const int z2= temp[4*1+i] - temp[4*3+i];
1146
        const int z3= temp[4*1+i] + temp[4*3+i];
1147

    
1148
        block[stride*0 +offset]= ((z0 + z3)*qmul + 2)>>2; //FIXME think about merging this into decode_resdual
1149
        block[stride*2 +offset]= ((z1 + z2)*qmul + 2)>>2;
1150
        block[stride*8 +offset]= ((z1 - z2)*qmul + 2)>>2;
1151
        block[stride*10+offset]= ((z0 - z3)*qmul + 2)>>2;
1152
    }
1153
}
1154

    
1155
#if 0
1156
/**
1157
 * dct tranforms the 16 dc values.
1158
 * @param qp quantization parameter ??? FIXME
1159
 */
1160
static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1161
//    const int qmul= dequant_coeff[qp][0];
1162
    int i;
1163
    int temp[16]; //FIXME check if this is a good idea
1164
    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1165
    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1166

1167
    for(i=0; i<4; i++){
1168
        const int offset= y_offset[i];
1169
        const int z0= block[offset+stride*0] + block[offset+stride*4];
1170
        const int z1= block[offset+stride*0] - block[offset+stride*4];
1171
        const int z2= block[offset+stride*1] - block[offset+stride*5];
1172
        const int z3= block[offset+stride*1] + block[offset+stride*5];
1173

1174
        temp[4*i+0]= z0+z3;
1175
        temp[4*i+1]= z1+z2;
1176
        temp[4*i+2]= z1-z2;
1177
        temp[4*i+3]= z0-z3;
1178
    }
1179

1180
    for(i=0; i<4; i++){
1181
        const int offset= x_offset[i];
1182
        const int z0= temp[4*0+i] + temp[4*2+i];
1183
        const int z1= temp[4*0+i] - temp[4*2+i];
1184
        const int z2= temp[4*1+i] - temp[4*3+i];
1185
        const int z3= temp[4*1+i] + temp[4*3+i];
1186

1187
        block[stride*0 +offset]= (z0 + z3)>>1;
1188
        block[stride*2 +offset]= (z1 + z2)>>1;
1189
        block[stride*8 +offset]= (z1 - z2)>>1;
1190
        block[stride*10+offset]= (z0 - z3)>>1;
1191
    }
1192
}
1193
#endif
1194

    
1195
#undef xStride
1196
#undef stride
1197

    
1198
static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){
1199
    const int qmul= dequant_coeff[qp][0];
1200
    const int stride= 16*2;
1201
    const int xStride= 16;
1202
    int a,b,c,d,e;
1203

    
1204
    a= block[stride*0 + xStride*0];
1205
    b= block[stride*0 + xStride*1];
1206
    c= block[stride*1 + xStride*0];
1207
    d= block[stride*1 + xStride*1];
1208

    
1209
    e= a-b;
1210
    a= a+b;
1211
    b= c-d;
1212
    c= c+d;
1213

    
1214
    block[stride*0 + xStride*0]= ((a+c)*qmul + 0)>>1;
1215
    block[stride*0 + xStride*1]= ((e+b)*qmul + 0)>>1;
1216
    block[stride*1 + xStride*0]= ((a-c)*qmul + 0)>>1;
1217
    block[stride*1 + xStride*1]= ((e-b)*qmul + 0)>>1;
1218
}
1219

    
1220
#if 0
1221
static void chroma_dc_dct_c(DCTELEM *block){
1222
    const int stride= 16*2;
1223
    const int xStride= 16;
1224
    int a,b,c,d,e;
1225

1226
    a= block[stride*0 + xStride*0];
1227
    b= block[stride*0 + xStride*1];
1228
    c= block[stride*1 + xStride*0];
1229
    d= block[stride*1 + xStride*1];
1230

1231
    e= a-b;
1232
    a= a+b;
1233
    b= c-d;
1234
    c= c+d;
1235

1236
    block[stride*0 + xStride*0]= (a+c);
1237
    block[stride*0 + xStride*1]= (e+b);
1238
    block[stride*1 + xStride*0]= (a-c);
1239
    block[stride*1 + xStride*1]= (e-b);
1240
}
1241
#endif
1242

    
1243
/**
1244
 * gets the chroma qp.
1245
 */
1246
static inline int get_chroma_qp(H264Context *h, int qscale){
1247
    
1248
    return chroma_qp[clip(qscale + h->pps.chroma_qp_index_offset, 0, 51)];
1249
}
1250

    
1251

    
1252
/**
1253
 *
1254
 */
1255
static void h264_add_idct_c(uint8_t *dst, DCTELEM *block, int stride){
1256
    int i;
1257
    uint8_t *cm = cropTbl + MAX_NEG_CROP;
1258

    
1259
    block[0] += 32;
1260

    
1261
    for(i=0; i<4; i++){
1262
        const int z0=  block[0 + 4*i]     +  block[2 + 4*i];
1263
        const int z1=  block[0 + 4*i]     -  block[2 + 4*i];
1264
        const int z2= (block[1 + 4*i]>>1) -  block[3 + 4*i];
1265
        const int z3=  block[1 + 4*i]     + (block[3 + 4*i]>>1);
1266

    
1267
        block[0 + 4*i]= z0 + z3;
1268
        block[1 + 4*i]= z1 + z2;
1269
        block[2 + 4*i]= z1 - z2;
1270
        block[3 + 4*i]= z0 - z3;
1271
    }
1272

    
1273
    for(i=0; i<4; i++){
1274
        const int z0=  block[i + 4*0]     +  block[i + 4*2];
1275
        const int z1=  block[i + 4*0]     -  block[i + 4*2];
1276
        const int z2= (block[i + 4*1]>>1) -  block[i + 4*3];
1277
        const int z3=  block[i + 4*1]     + (block[i + 4*3]>>1);
1278

    
1279
        dst[i + 0*stride]= cm[ dst[i + 0*stride] + ((z0 + z3) >> 6) ];
1280
        dst[i + 1*stride]= cm[ dst[i + 1*stride] + ((z1 + z2) >> 6) ];
1281
        dst[i + 2*stride]= cm[ dst[i + 2*stride] + ((z1 - z2) >> 6) ];
1282
        dst[i + 3*stride]= cm[ dst[i + 3*stride] + ((z0 - z3) >> 6) ];
1283
    }
1284
}
1285

    
1286
#if 0
1287
static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
1288
    int i;
1289
    //FIXME try int temp instead of block
1290
    
1291
    for(i=0; i<4; i++){
1292
        const int d0= src1[0 + i*stride] - src2[0 + i*stride];
1293
        const int d1= src1[1 + i*stride] - src2[1 + i*stride];
1294
        const int d2= src1[2 + i*stride] - src2[2 + i*stride];
1295
        const int d3= src1[3 + i*stride] - src2[3 + i*stride];
1296
        const int z0= d0 + d3;
1297
        const int z3= d0 - d3;
1298
        const int z1= d1 + d2;
1299
        const int z2= d1 - d2;
1300
        
1301
        block[0 + 4*i]=   z0 +   z1;
1302
        block[1 + 4*i]= 2*z3 +   z2;
1303
        block[2 + 4*i]=   z0 -   z1;
1304
        block[3 + 4*i]=   z3 - 2*z2;
1305
    }    
1306

1307
    for(i=0; i<4; i++){
1308
        const int z0= block[0*4 + i] + block[3*4 + i];
1309
        const int z3= block[0*4 + i] - block[3*4 + i];
1310
        const int z1= block[1*4 + i] + block[2*4 + i];
1311
        const int z2= block[1*4 + i] - block[2*4 + i];
1312
        
1313
        block[0*4 + i]=   z0 +   z1;
1314
        block[1*4 + i]= 2*z3 +   z2;
1315
        block[2*4 + i]=   z0 -   z1;
1316
        block[3*4 + i]=   z3 - 2*z2;
1317
    }
1318
}
1319
#endif
1320

    
1321
//FIXME need to check that this doesnt overflow signed 32 bit for low qp, iam not sure, its very close
1322
//FIXME check that gcc inlines this (and optimizes intra & seperate_dc stuff away)
1323
static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int seperate_dc){
1324
    int i;
1325
    const int * const quant_table= quant_coeff[qscale];
1326
    const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1327
    const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1328
    const unsigned int threshold2= (threshold1<<1);
1329
    int last_non_zero;
1330

    
1331
    if(seperate_dc){
1332
        if(qscale<=18){
1333
            //avoid overflows
1334
            const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1335
            const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1336
            const unsigned int dc_threshold2= (dc_threshold1<<1);
1337

    
1338
            int level= block[0]*quant_coeff[qscale+18][0];
1339
            if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1340
                if(level>0){
1341
                    level= (dc_bias + level)>>(QUANT_SHIFT-2);
1342
                    block[0]= level;
1343
                }else{
1344
                    level= (dc_bias - level)>>(QUANT_SHIFT-2);
1345
                    block[0]= -level;
1346
                }
1347
//                last_non_zero = i;
1348
            }else{
1349
                block[0]=0;
1350
            }
1351
        }else{
1352
            const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1353
            const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1354
            const unsigned int dc_threshold2= (dc_threshold1<<1);
1355

    
1356
            int level= block[0]*quant_table[0];
1357
            if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1358
                if(level>0){
1359
                    level= (dc_bias + level)>>(QUANT_SHIFT+1);
1360
                    block[0]= level;
1361
                }else{
1362
                    level= (dc_bias - level)>>(QUANT_SHIFT+1);
1363
                    block[0]= -level;
1364
                }
1365
//                last_non_zero = i;
1366
            }else{
1367
                block[0]=0;
1368
            }
1369
        }
1370
        last_non_zero= 0;
1371
        i=1;
1372
    }else{
1373
        last_non_zero= -1;
1374
        i=0;
1375
    }
1376

    
1377
    for(; i<16; i++){
1378
        const int j= scantable[i];
1379
        int level= block[j]*quant_table[j];
1380

    
1381
//        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
1382
//           || bias-level >= (1<<(QMAT_SHIFT - 3))){
1383
        if(((unsigned)(level+threshold1))>threshold2){
1384
            if(level>0){
1385
                level= (bias + level)>>QUANT_SHIFT;
1386
                block[j]= level;
1387
            }else{
1388
                level= (bias - level)>>QUANT_SHIFT;
1389
                block[j]= -level;
1390
            }
1391
            last_non_zero = i;
1392
        }else{
1393
            block[j]=0;
1394
        }
1395
    }
1396

    
1397
    return last_non_zero;
1398
}
1399

    
1400
static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
1401
    const uint32_t a= ((uint32_t*)(src-stride))[0];
1402
    ((uint32_t*)(src+0*stride))[0]= a;
1403
    ((uint32_t*)(src+1*stride))[0]= a;
1404
    ((uint32_t*)(src+2*stride))[0]= a;
1405
    ((uint32_t*)(src+3*stride))[0]= a;
1406
}
1407

    
1408
static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
1409
    ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
1410
    ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
1411
    ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
1412
    ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
1413
}
1414

    
1415
static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
1416
    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
1417
                   + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
1418
    
1419
    ((uint32_t*)(src+0*stride))[0]= 
1420
    ((uint32_t*)(src+1*stride))[0]= 
1421
    ((uint32_t*)(src+2*stride))[0]= 
1422
    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
1423
}
1424

    
1425
static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
1426
    const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
1427
    
1428
    ((uint32_t*)(src+0*stride))[0]= 
1429
    ((uint32_t*)(src+1*stride))[0]= 
1430
    ((uint32_t*)(src+2*stride))[0]= 
1431
    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
1432
}
1433

    
1434
static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
1435
    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
1436
    
1437
    ((uint32_t*)(src+0*stride))[0]= 
1438
    ((uint32_t*)(src+1*stride))[0]= 
1439
    ((uint32_t*)(src+2*stride))[0]= 
1440
    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
1441
}
1442

    
1443
static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
1444
    ((uint32_t*)(src+0*stride))[0]= 
1445
    ((uint32_t*)(src+1*stride))[0]= 
1446
    ((uint32_t*)(src+2*stride))[0]= 
1447
    ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
1448
}
1449

    
1450

    
1451
#define LOAD_TOP_RIGHT_EDGE\
1452
    const int t4= topright[0];\
1453
    const int t5= topright[1];\
1454
    const int t6= topright[2];\
1455
    const int t7= topright[3];\
1456

    
1457
#define LOAD_LEFT_EDGE\
1458
    const int l0= src[-1+0*stride];\
1459
    const int l1= src[-1+1*stride];\
1460
    const int l2= src[-1+2*stride];\
1461
    const int l3= src[-1+3*stride];\
1462

    
1463
#define LOAD_TOP_EDGE\
1464
    const int t0= src[ 0-1*stride];\
1465
    const int t1= src[ 1-1*stride];\
1466
    const int t2= src[ 2-1*stride];\
1467
    const int t3= src[ 3-1*stride];\
1468

    
1469
static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
1470
    const int lt= src[-1-1*stride];
1471
    LOAD_TOP_EDGE
1472
    LOAD_LEFT_EDGE
1473

    
1474
    src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; 
1475
    src[0+2*stride]=
1476
    src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; 
1477
    src[0+1*stride]=
1478
    src[1+2*stride]=
1479
    src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; 
1480
    src[0+0*stride]=
1481
    src[1+1*stride]=
1482
    src[2+2*stride]=
1483
    src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; 
1484
    src[1+0*stride]=
1485
    src[2+1*stride]=
1486
    src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
1487
    src[2+0*stride]=
1488
    src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1489
    src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1490
}
1491

    
1492
static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
1493
    LOAD_TOP_EDGE    
1494
    LOAD_TOP_RIGHT_EDGE    
1495
//    LOAD_LEFT_EDGE    
1496

    
1497
    src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
1498
    src[1+0*stride]=
1499
    src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
1500
    src[2+0*stride]=
1501
    src[1+1*stride]=
1502
    src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
1503
    src[3+0*stride]=
1504
    src[2+1*stride]=
1505
    src[1+2*stride]=
1506
    src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
1507
    src[3+1*stride]=
1508
    src[2+2*stride]=
1509
    src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
1510
    src[3+2*stride]=
1511
    src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
1512
    src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
1513
}
1514

    
1515
static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
1516
    const int lt= src[-1-1*stride];
1517
    LOAD_TOP_EDGE    
1518
    LOAD_LEFT_EDGE    
1519
    const __attribute__((unused)) int unu= l3;
1520

    
1521
    src[0+0*stride]=
1522
    src[1+2*stride]=(lt + t0 + 1)>>1;
1523
    src[1+0*stride]=
1524
    src[2+2*stride]=(t0 + t1 + 1)>>1;
1525
    src[2+0*stride]=
1526
    src[3+2*stride]=(t1 + t2 + 1)>>1;
1527
    src[3+0*stride]=(t2 + t3 + 1)>>1;
1528
    src[0+1*stride]=
1529
    src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
1530
    src[1+1*stride]=
1531
    src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
1532
    src[2+1*stride]=
1533
    src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1534
    src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1535
    src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1536
    src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1537
}
1538

    
1539
static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
1540
    LOAD_TOP_EDGE    
1541
    LOAD_TOP_RIGHT_EDGE    
1542
    const __attribute__((unused)) int unu= t7;
1543

    
1544
    src[0+0*stride]=(t0 + t1 + 1)>>1;
1545
    src[1+0*stride]=
1546
    src[0+2*stride]=(t1 + t2 + 1)>>1;
1547
    src[2+0*stride]=
1548
    src[1+2*stride]=(t2 + t3 + 1)>>1;
1549
    src[3+0*stride]=
1550
    src[2+2*stride]=(t3 + t4+ 1)>>1;
1551
    src[3+2*stride]=(t4 + t5+ 1)>>1;
1552
    src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1553
    src[1+1*stride]=
1554
    src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
1555
    src[2+1*stride]=
1556
    src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
1557
    src[3+1*stride]=
1558
    src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
1559
    src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
1560
}
1561

    
1562
static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
1563
    LOAD_LEFT_EDGE    
1564

    
1565
    src[0+0*stride]=(l0 + l1 + 1)>>1;
1566
    src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1567
    src[2+0*stride]=
1568
    src[0+1*stride]=(l1 + l2 + 1)>>1;
1569
    src[3+0*stride]=
1570
    src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1571
    src[2+1*stride]=
1572
    src[0+2*stride]=(l2 + l3 + 1)>>1;
1573
    src[3+1*stride]=
1574
    src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
1575
    src[3+2*stride]=
1576
    src[1+3*stride]=
1577
    src[0+3*stride]=
1578
    src[2+2*stride]=
1579
    src[2+3*stride]=
1580
    src[3+3*stride]=l3;
1581
}
1582
    
1583
static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
1584
    const int lt= src[-1-1*stride];
1585
    LOAD_TOP_EDGE    
1586
    LOAD_LEFT_EDGE    
1587
    const __attribute__((unused)) int unu= t3;
1588

    
1589
    src[0+0*stride]=
1590
    src[2+1*stride]=(lt + l0 + 1)>>1;
1591
    src[1+0*stride]=
1592
    src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
1593
    src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
1594
    src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
1595
    src[0+1*stride]=
1596
    src[2+2*stride]=(l0 + l1 + 1)>>1;
1597
    src[1+1*stride]=
1598
    src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
1599
    src[0+2*stride]=
1600
    src[2+3*stride]=(l1 + l2+ 1)>>1;
1601
    src[1+2*stride]=
1602
    src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
1603
    src[0+3*stride]=(l2 + l3 + 1)>>1;
1604
    src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
1605
}
1606

    
1607
static void pred16x16_vertical_c(uint8_t *src, int stride){
1608
    int i;
1609
    const uint32_t a= ((uint32_t*)(src-stride))[0];
1610
    const uint32_t b= ((uint32_t*)(src-stride))[1];
1611
    const uint32_t c= ((uint32_t*)(src-stride))[2];
1612
    const uint32_t d= ((uint32_t*)(src-stride))[3];
1613
    
1614
    for(i=0; i<16; i++){
1615
        ((uint32_t*)(src+i*stride))[0]= a;
1616
        ((uint32_t*)(src+i*stride))[1]= b;
1617
        ((uint32_t*)(src+i*stride))[2]= c;
1618
        ((uint32_t*)(src+i*stride))[3]= d;
1619
    }
1620
}
1621

    
1622
static void pred16x16_horizontal_c(uint8_t *src, int stride){
1623
    int i;
1624

    
1625
    for(i=0; i<16; i++){
1626
        ((uint32_t*)(src+i*stride))[0]=
1627
        ((uint32_t*)(src+i*stride))[1]=
1628
        ((uint32_t*)(src+i*stride))[2]=
1629
        ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
1630
    }
1631
}
1632

    
1633
static void pred16x16_dc_c(uint8_t *src, int stride){
1634
    int i, dc=0;
1635

    
1636
    for(i=0;i<16; i++){
1637
        dc+= src[-1+i*stride];
1638
    }
1639
    
1640
    for(i=0;i<16; i++){
1641
        dc+= src[i-stride];
1642
    }
1643

    
1644
    dc= 0x01010101*((dc + 16)>>5);
1645

    
1646
    for(i=0; i<16; i++){
1647
        ((uint32_t*)(src+i*stride))[0]=
1648
        ((uint32_t*)(src+i*stride))[1]=
1649
        ((uint32_t*)(src+i*stride))[2]=
1650
        ((uint32_t*)(src+i*stride))[3]= dc;
1651
    }
1652
}
1653

    
1654
static void pred16x16_left_dc_c(uint8_t *src, int stride){
1655
    int i, dc=0;
1656

    
1657
    for(i=0;i<16; i++){
1658
        dc+= src[-1+i*stride];
1659
    }
1660
    
1661
    dc= 0x01010101*((dc + 8)>>4);
1662

    
1663
    for(i=0; i<16; i++){
1664
        ((uint32_t*)(src+i*stride))[0]=
1665
        ((uint32_t*)(src+i*stride))[1]=
1666
        ((uint32_t*)(src+i*stride))[2]=
1667
        ((uint32_t*)(src+i*stride))[3]= dc;
1668
    }
1669
}
1670

    
1671
static void pred16x16_top_dc_c(uint8_t *src, int stride){
1672
    int i, dc=0;
1673

    
1674
    for(i=0;i<16; i++){
1675
        dc+= src[i-stride];
1676
    }
1677
    dc= 0x01010101*((dc + 8)>>4);
1678

    
1679
    for(i=0; i<16; i++){
1680
        ((uint32_t*)(src+i*stride))[0]=
1681
        ((uint32_t*)(src+i*stride))[1]=
1682
        ((uint32_t*)(src+i*stride))[2]=
1683
        ((uint32_t*)(src+i*stride))[3]= dc;
1684
    }
1685
}
1686

    
1687
static void pred16x16_128_dc_c(uint8_t *src, int stride){
1688
    int i;
1689

    
1690
    for(i=0; i<16; i++){
1691
        ((uint32_t*)(src+i*stride))[0]=
1692
        ((uint32_t*)(src+i*stride))[1]=
1693
        ((uint32_t*)(src+i*stride))[2]=
1694
        ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
1695
    }
1696
}
1697

    
1698
static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
1699
  int i, j, k;
1700
  int a;
1701
  uint8_t *cm = cropTbl + MAX_NEG_CROP;
1702
  const uint8_t * const src0 = src+7-stride;
1703
  const uint8_t *src1 = src+8*stride-1;
1704
  const uint8_t *src2 = src1-2*stride;      // == src+6*stride-1;
1705
  int H = src0[1] - src0[-1];
1706
  int V = src1[0] - src2[ 0];
1707
  for(k=2; k<=8; ++k) {
1708
    src1 += stride; src2 -= stride;
1709
    H += k*(src0[k] - src0[-k]);
1710
    V += k*(src1[0] - src2[ 0]);
1711
  }
1712
  if(svq3){
1713
    H = ( 5*(H/4) ) / 16;
1714
    V = ( 5*(V/4) ) / 16;
1715

    
1716
    /* required for 100% accuracy */
1717
    i = H; H = V; V = i;
1718
  }else{
1719
    H = ( 5*H+32 ) >> 6;
1720
    V = ( 5*V+32 ) >> 6;
1721
  }
1722

    
1723
  a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
1724
  for(j=16; j>0; --j) {
1725
    int b = a;
1726
    a += V;
1727
    for(i=-16; i<0; i+=4) {
1728
      src[16+i] = cm[ (b    ) >> 5 ];
1729
      src[17+i] = cm[ (b+  H) >> 5 ];
1730
      src[18+i] = cm[ (b+2*H) >> 5 ];
1731
      src[19+i] = cm[ (b+3*H) >> 5 ];
1732
      b += 4*H;
1733
    }
1734
    src += stride;
1735
  }
1736
}
1737

    
1738
static void pred16x16_plane_c(uint8_t *src, int stride){
1739
    pred16x16_plane_compat_c(src, stride, 0);
1740
}
1741

    
1742
static void pred8x8_vertical_c(uint8_t *src, int stride){
1743
    int i;
1744
    const uint32_t a= ((uint32_t*)(src-stride))[0];
1745
    const uint32_t b= ((uint32_t*)(src-stride))[1];
1746
    
1747
    for(i=0; i<8; i++){
1748
        ((uint32_t*)(src+i*stride))[0]= a;
1749
        ((uint32_t*)(src+i*stride))[1]= b;
1750
    }
1751
}
1752

    
1753
static void pred8x8_horizontal_c(uint8_t *src, int stride){
1754
    int i;
1755

    
1756
    for(i=0; i<8; i++){
1757
        ((uint32_t*)(src+i*stride))[0]=
1758
        ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
1759
    }
1760
}
1761

    
1762
static void pred8x8_128_dc_c(uint8_t *src, int stride){
1763
    int i;
1764

    
1765
    for(i=0; i<4; i++){
1766
        ((uint32_t*)(src+i*stride))[0]= 
1767
        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
1768
    }
1769
    for(i=4; i<8; i++){
1770
        ((uint32_t*)(src+i*stride))[0]= 
1771
        ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
1772
    }
1773
}
1774

    
1775
static void pred8x8_left_dc_c(uint8_t *src, int stride){
1776
    int i;
1777
    int dc0, dc2;
1778

    
1779
    dc0=dc2=0;
1780
    for(i=0;i<4; i++){
1781
        dc0+= src[-1+i*stride];
1782
        dc2+= src[-1+(i+4)*stride];
1783
    }
1784
    dc0= 0x01010101*((dc0 + 2)>>2);
1785
    dc2= 0x01010101*((dc2 + 2)>>2);
1786

    
1787
    for(i=0; i<4; i++){
1788
        ((uint32_t*)(src+i*stride))[0]=
1789
        ((uint32_t*)(src+i*stride))[1]= dc0;
1790
    }
1791
    for(i=4; i<8; i++){
1792
        ((uint32_t*)(src+i*stride))[0]=
1793
        ((uint32_t*)(src+i*stride))[1]= dc2;
1794
    }
1795
}
1796

    
1797
static void pred8x8_top_dc_c(uint8_t *src, int stride){
1798
    int i;
1799
    int dc0, dc1;
1800

    
1801
    dc0=dc1=0;
1802
    for(i=0;i<4; i++){
1803
        dc0+= src[i-stride];
1804
        dc1+= src[4+i-stride];
1805
    }
1806
    dc0= 0x01010101*((dc0 + 2)>>2);
1807
    dc1= 0x01010101*((dc1 + 2)>>2);
1808

    
1809
    for(i=0; i<4; i++){
1810
        ((uint32_t*)(src+i*stride))[0]= dc0;
1811
        ((uint32_t*)(src+i*stride))[1]= dc1;
1812
    }
1813
    for(i=4; i<8; i++){
1814
        ((uint32_t*)(src+i*stride))[0]= dc0;
1815
        ((uint32_t*)(src+i*stride))[1]= dc1;
1816
    }
1817
}
1818

    
1819

    
1820
static void pred8x8_dc_c(uint8_t *src, int stride){
1821
    int i;
1822
    int dc0, dc1, dc2, dc3;
1823

    
1824
    dc0=dc1=dc2=0;
1825
    for(i=0;i<4; i++){
1826
        dc0+= src[-1+i*stride] + src[i-stride];
1827
        dc1+= src[4+i-stride];
1828
        dc2+= src[-1+(i+4)*stride];
1829
    }
1830
    dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
1831
    dc0= 0x01010101*((dc0 + 4)>>3);
1832
    dc1= 0x01010101*((dc1 + 2)>>2);
1833
    dc2= 0x01010101*((dc2 + 2)>>2);
1834

    
1835
    for(i=0; i<4; i++){
1836
        ((uint32_t*)(src+i*stride))[0]= dc0;
1837
        ((uint32_t*)(src+i*stride))[1]= dc1;
1838
    }
1839
    for(i=4; i<8; i++){
1840
        ((uint32_t*)(src+i*stride))[0]= dc2;
1841
        ((uint32_t*)(src+i*stride))[1]= dc3;
1842
    }
1843
}
1844

    
1845
static void pred8x8_plane_c(uint8_t *src, int stride){
1846
  int j, k;
1847
  int a;
1848
  uint8_t *cm = cropTbl + MAX_NEG_CROP;
1849
  const uint8_t * const src0 = src+3-stride;
1850
  const uint8_t *src1 = src+4*stride-1;
1851
  const uint8_t *src2 = src1-2*stride;      // == src+2*stride-1;
1852
  int H = src0[1] - src0[-1];
1853
  int V = src1[0] - src2[ 0];
1854
  for(k=2; k<=4; ++k) {
1855
    src1 += stride; src2 -= stride;
1856
    H += k*(src0[k] - src0[-k]);
1857
    V += k*(src1[0] - src2[ 0]);
1858
  }
1859
  H = ( 17*H+16 ) >> 5;
1860
  V = ( 17*V+16 ) >> 5;
1861

    
1862
  a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
1863
  for(j=8; j>0; --j) {
1864
    int b = a;
1865
    a += V;
1866
    src[0] = cm[ (b    ) >> 5 ];
1867
    src[1] = cm[ (b+  H) >> 5 ];
1868
    src[2] = cm[ (b+2*H) >> 5 ];
1869
    src[3] = cm[ (b+3*H) >> 5 ];
1870
    src[4] = cm[ (b+4*H) >> 5 ];
1871
    src[5] = cm[ (b+5*H) >> 5 ];
1872
    src[6] = cm[ (b+6*H) >> 5 ];
1873
    src[7] = cm[ (b+7*H) >> 5 ];
1874
    src += stride;
1875
  }
1876
}
1877

    
1878
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1879
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1880
                           int src_x_offset, int src_y_offset,
1881
                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1882
    MpegEncContext * const s = &h->s;
1883
    const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1884
    const int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1885
    const int luma_xy= (mx&3) + ((my&3)<<2);
1886
    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*s->linesize;
1887
    uint8_t * src_cb= pic->data[1] + (mx>>3) + (my>>3)*s->uvlinesize;
1888
    uint8_t * src_cr= pic->data[2] + (mx>>3) + (my>>3)*s->uvlinesize;
1889
    int extra_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16; //FIXME increase edge?, IMHO not worth it
1890
    int extra_height= extra_width;
1891
    int emu=0;
1892
    const int full_mx= mx>>2;
1893
    const int full_my= my>>2;
1894
    
1895
    assert(pic->data[0]);
1896
    
1897
    if(mx&7) extra_width -= 3;
1898
    if(my&7) extra_height -= 3;
1899
    
1900
    if(   full_mx < 0-extra_width 
1901
       || full_my < 0-extra_height 
1902
       || full_mx + 16/*FIXME*/ > s->width + extra_width 
1903
       || full_my + 16/*FIXME*/ > s->height + extra_height){
1904
        ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, s->width, s->height);
1905
            src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
1906
        emu=1;
1907
    }
1908
    
1909
    qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
1910
    if(!square){
1911
        qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
1912
    }
1913
    
1914
    if(s->flags&CODEC_FLAG_GRAY) return;
1915
    
1916
    if(emu){
1917
        ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
1918
            src_cb= s->edge_emu_buffer;
1919
    }
1920
    chroma_op(dest_cb, src_cb, s->uvlinesize, chroma_height, mx&7, my&7);
1921

    
1922
    if(emu){
1923
        ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), s->width>>1, s->height>>1);
1924
            src_cr= s->edge_emu_buffer;
1925
    }
1926
    chroma_op(dest_cr, src_cr, s->uvlinesize, chroma_height, mx&7, my&7);
1927
}
1928

    
1929
static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1930
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1931
                           int x_offset, int y_offset,
1932
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1933
                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1934
                           int list0, int list1){
1935
    MpegEncContext * const s = &h->s;
1936
    qpel_mc_func *qpix_op=  qpix_put;
1937
    h264_chroma_mc_func chroma_op= chroma_put;
1938
    
1939
    dest_y  += 2*x_offset + 2*y_offset*s->  linesize;
1940
    dest_cb +=   x_offset +   y_offset*s->uvlinesize;
1941
    dest_cr +=   x_offset +   y_offset*s->uvlinesize;
1942
    x_offset += 8*s->mb_x;
1943
    y_offset += 8*s->mb_y;
1944
    
1945
    if(list0){
1946
        Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1947
        mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1948
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
1949
                           qpix_op, chroma_op);
1950

    
1951
        qpix_op=  qpix_avg;
1952
        chroma_op= chroma_avg;
1953
    }
1954

    
1955
    if(list1){
1956
        Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1957
        mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1958
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
1959
                           qpix_op, chroma_op);
1960
    }
1961
}
1962

    
1963
static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1964
                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1965
                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg)){
1966
    MpegEncContext * const s = &h->s;
1967
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
1968
    const int mb_type= s->current_picture.mb_type[mb_xy];
1969
    
1970
    assert(IS_INTER(mb_type));
1971
    
1972
    if(IS_16X16(mb_type)){
1973
        mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1974
                qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1975
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1976
    }else if(IS_16X8(mb_type)){
1977
        mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1978
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1979
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1980
        mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1981
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1982
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1983
    }else if(IS_8X16(mb_type)){
1984
        mc_part(h, 0, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 0, 0,
1985
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1986
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1987
        mc_part(h, 4, 0, 8, 8*s->linesize, dest_y, dest_cb, dest_cr, 4, 0,
1988
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1989
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1990
    }else{
1991
        int i;
1992
        
1993
        assert(IS_8X8(mb_type));
1994

    
1995
        for(i=0; i<4; i++){
1996
            const int sub_mb_type= h->sub_mb_type[i];
1997
            const int n= 4*i;
1998
            int x_offset= (i&1)<<2;
1999
            int y_offset= (i&2)<<1;
2000

    
2001
            if(IS_SUB_8X8(sub_mb_type)){
2002
                mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2003
                    qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
2004
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2005
            }else if(IS_SUB_8X4(sub_mb_type)){
2006
                mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2007
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2008
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2009
                mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
2010
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
2011
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2012
            }else if(IS_SUB_4X8(sub_mb_type)){
2013
                mc_part(h, n  , 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
2014
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2015
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2016
                mc_part(h, n+1, 0, 4, 4*s->linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
2017
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2018
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2019
            }else{
2020
                int j;
2021
                assert(IS_SUB_4X4(sub_mb_type));
2022
                for(j=0; j<4; j++){
2023
                    int sub_x_offset= x_offset + 2*(j&1);
2024
                    int sub_y_offset= y_offset +   (j&2);
2025
                    mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
2026
                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
2027
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
2028
                }
2029
            }
2030
        }
2031
    }
2032
}
2033

    
2034
static void decode_init_vlc(H264Context *h){
2035
    static int done = 0;
2036

    
2037
    if (!done) {
2038
        int i;
2039
        done = 1;
2040

    
2041
        init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5, 
2042
                 &chroma_dc_coeff_token_len [0], 1, 1,
2043
                 &chroma_dc_coeff_token_bits[0], 1, 1);
2044

    
2045
        for(i=0; i<4; i++){
2046
            init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17, 
2047
                     &coeff_token_len [i][0], 1, 1,
2048
                     &coeff_token_bits[i][0], 1, 1);
2049
        }
2050

    
2051
        for(i=0; i<3; i++){
2052
            init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
2053
                     &chroma_dc_total_zeros_len [i][0], 1, 1,
2054
                     &chroma_dc_total_zeros_bits[i][0], 1, 1);
2055
        }
2056
        for(i=0; i<15; i++){
2057
            init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16, 
2058
                     &total_zeros_len [i][0], 1, 1,
2059
                     &total_zeros_bits[i][0], 1, 1);
2060
        }
2061

    
2062
        for(i=0; i<6; i++){
2063
            init_vlc(&run_vlc[i], RUN_VLC_BITS, 7, 
2064
                     &run_len [i][0], 1, 1,
2065
                     &run_bits[i][0], 1, 1);
2066
        }
2067
        init_vlc(&run7_vlc, RUN7_VLC_BITS, 16, 
2068
                 &run_len [6][0], 1, 1,
2069
                 &run_bits[6][0], 1, 1);
2070
    }
2071
}
2072

    
2073
/**
2074
 * Sets the intra prediction function pointers.
2075
 */
2076
static void init_pred_ptrs(H264Context *h){
2077
//    MpegEncContext * const s = &h->s;
2078

    
2079
    h->pred4x4[VERT_PRED           ]= pred4x4_vertical_c;
2080
    h->pred4x4[HOR_PRED            ]= pred4x4_horizontal_c;
2081
    h->pred4x4[DC_PRED             ]= pred4x4_dc_c;
2082
    h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
2083
    h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
2084
    h->pred4x4[VERT_RIGHT_PRED     ]= pred4x4_vertical_right_c;
2085
    h->pred4x4[HOR_DOWN_PRED       ]= pred4x4_horizontal_down_c;
2086
    h->pred4x4[VERT_LEFT_PRED      ]= pred4x4_vertical_left_c;
2087
    h->pred4x4[HOR_UP_PRED         ]= pred4x4_horizontal_up_c;
2088
    h->pred4x4[LEFT_DC_PRED        ]= pred4x4_left_dc_c;
2089
    h->pred4x4[TOP_DC_PRED         ]= pred4x4_top_dc_c;
2090
    h->pred4x4[DC_128_PRED         ]= pred4x4_128_dc_c;
2091

    
2092
    h->pred8x8[DC_PRED8x8     ]= pred8x8_dc_c;
2093
    h->pred8x8[VERT_PRED8x8   ]= pred8x8_vertical_c;
2094
    h->pred8x8[HOR_PRED8x8    ]= pred8x8_horizontal_c;
2095
    h->pred8x8[PLANE_PRED8x8  ]= pred8x8_plane_c;
2096
    h->pred8x8[LEFT_DC_PRED8x8]= pred8x8_left_dc_c;
2097
    h->pred8x8[TOP_DC_PRED8x8 ]= pred8x8_top_dc_c;
2098
    h->pred8x8[DC_128_PRED8x8 ]= pred8x8_128_dc_c;
2099

    
2100
    h->pred16x16[DC_PRED8x8     ]= pred16x16_dc_c;
2101
    h->pred16x16[VERT_PRED8x8   ]= pred16x16_vertical_c;
2102
    h->pred16x16[HOR_PRED8x8    ]= pred16x16_horizontal_c;
2103
    h->pred16x16[PLANE_PRED8x8  ]= pred16x16_plane_c;
2104
    h->pred16x16[LEFT_DC_PRED8x8]= pred16x16_left_dc_c;
2105
    h->pred16x16[TOP_DC_PRED8x8 ]= pred16x16_top_dc_c;
2106
    h->pred16x16[DC_128_PRED8x8 ]= pred16x16_128_dc_c;
2107
}
2108

    
2109
static void free_tables(H264Context *h){
2110
    av_freep(&h->intra4x4_pred_mode);
2111
    av_freep(&h->chroma_pred_mode_table);
2112
    av_freep(&h->cbp_table);
2113
    av_freep(&h->non_zero_count);
2114
    av_freep(&h->slice_table_base);
2115
    av_freep(&h->top_border);
2116
    h->slice_table= NULL;
2117

    
2118
    av_freep(&h->mb2b_xy);
2119
    av_freep(&h->mb2b8_xy);
2120
}
2121

    
2122
/**
2123
 * allocates tables.
2124
 * needs widzh/height
2125
 */
2126
static int alloc_tables(H264Context *h){
2127
    MpegEncContext * const s = &h->s;
2128
    const int big_mb_num= s->mb_stride * (s->mb_height+1);
2129
    int x,y;
2130

    
2131
    CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
2132

    
2133
    CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
2134
    CHECKED_ALLOCZ(h->slice_table_base  , big_mb_num * sizeof(uint8_t))
2135
    CHECKED_ALLOCZ(h->top_border       , s->mb_width * (16+8+8) * sizeof(uint8_t))
2136

    
2137
    if( h->pps.cabac ) {
2138
        CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2139
        CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2140
    }
2141

    
2142
    memset(h->slice_table_base, -1, big_mb_num  * sizeof(uint8_t));
2143
    h->slice_table= h->slice_table_base + s->mb_stride + 1;
2144

    
2145
    CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint16_t));
2146
    CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint16_t));
2147
    for(y=0; y<s->mb_height; y++){
2148
        for(x=0; x<s->mb_width; x++){
2149
            const int mb_xy= x + y*s->mb_stride;
2150
            const int b_xy = 4*x + 4*y*h->b_stride;
2151
            const int b8_xy= 2*x + 2*y*h->b8_stride;
2152
        
2153
            h->mb2b_xy [mb_xy]= b_xy;
2154
            h->mb2b8_xy[mb_xy]= b8_xy;
2155
        }
2156
    }
2157
    
2158
    return 0;
2159
fail:
2160
    free_tables(h);
2161
    return -1;
2162
}
2163

    
2164
static void common_init(H264Context *h){
2165
    MpegEncContext * const s = &h->s;
2166

    
2167
    s->width = s->avctx->width;
2168
    s->height = s->avctx->height;
2169
    s->codec_id= s->avctx->codec->id;
2170
    
2171
    init_pred_ptrs(h);
2172

    
2173
    s->unrestricted_mv=1;
2174
    s->decode=1; //FIXME
2175
}
2176

    
2177
static int decode_init(AVCodecContext *avctx){
2178
    H264Context *h= avctx->priv_data;
2179
    MpegEncContext * const s = &h->s;
2180

    
2181
    MPV_decode_defaults(s);
2182
    
2183
    s->avctx = avctx;
2184
    common_init(h);
2185

    
2186
    s->out_format = FMT_H264;
2187
    s->workaround_bugs= avctx->workaround_bugs;
2188

    
2189
    // set defaults
2190
//    s->decode_mb= ff_h263_decode_mb;
2191
    s->low_delay= 1;
2192
    avctx->pix_fmt= PIX_FMT_YUV420P;
2193

    
2194
    decode_init_vlc(h);
2195
    
2196
    return 0;
2197
}
2198

    
2199
static void frame_start(H264Context *h){
2200
    MpegEncContext * const s = &h->s;
2201
    int i;
2202

    
2203
    MPV_frame_start(s, s->avctx);
2204
    ff_er_frame_start(s);
2205
    h->mmco_index=0;
2206

    
2207
    assert(s->linesize && s->uvlinesize);
2208

    
2209
    for(i=0; i<16; i++){
2210
        h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2211
        h->chroma_subblock_offset[i]= 2*((scan8[i] - scan8[0])&7) + 2*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2212
    }
2213
    for(i=0; i<4; i++){
2214
        h->block_offset[16+i]=
2215
        h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2216
    }
2217

    
2218
//    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2219
}
2220

    
2221
static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2222
    MpegEncContext * const s = &h->s;
2223
    int i;
2224
    
2225
    src_y  -=   linesize;
2226
    src_cb -= uvlinesize;
2227
    src_cr -= uvlinesize;
2228

    
2229
    h->left_border[0]= h->top_border[s->mb_x][15];
2230
    for(i=1; i<17; i++){
2231
        h->left_border[i]= src_y[15+i*  linesize];
2232
    }
2233
    
2234
    *(uint64_t*)(h->top_border[s->mb_x]+0)= *(uint64_t*)(src_y +  16*linesize);
2235
    *(uint64_t*)(h->top_border[s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2236

    
2237
    if(!(s->flags&CODEC_FLAG_GRAY)){
2238
        h->left_border[17  ]= h->top_border[s->mb_x][16+7];
2239
        h->left_border[17+9]= h->top_border[s->mb_x][24+7];
2240
        for(i=1; i<9; i++){
2241
            h->left_border[i+17  ]= src_cb[7+i*uvlinesize];
2242
            h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2243
        }
2244
        *(uint64_t*)(h->top_border[s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2245
        *(uint64_t*)(h->top_border[s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2246
    }
2247
}
2248

    
2249
static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2250
    MpegEncContext * const s = &h->s;
2251
    int temp8, i;
2252
    uint64_t temp64;
2253

    
2254
    src_y  -=   linesize + 1;
2255
    src_cb -= uvlinesize + 1;
2256
    src_cr -= uvlinesize + 1;
2257

    
2258
#define XCHG(a,b,t,xchg)\
2259
t= a;\
2260
if(xchg)\
2261
    a= b;\
2262
b= t;
2263
    
2264
    for(i=0; i<17; i++){
2265
        XCHG(h->left_border[i     ], src_y [i*  linesize], temp8, xchg);
2266
    }
2267
    
2268
    XCHG(*(uint64_t*)(h->top_border[s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2269
    XCHG(*(uint64_t*)(h->top_border[s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2270

    
2271
    if(!(s->flags&CODEC_FLAG_GRAY)){
2272
        for(i=0; i<9; i++){
2273
            XCHG(h->left_border[i+17  ], src_cb[i*uvlinesize], temp8, xchg);
2274
            XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2275
        }
2276
        XCHG(*(uint64_t*)(h->top_border[s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2277
        XCHG(*(uint64_t*)(h->top_border[s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2278
    }
2279
}
2280

    
2281
static void hl_decode_mb(H264Context *h){
2282
    MpegEncContext * const s = &h->s;
2283
    const int mb_x= s->mb_x;
2284
    const int mb_y= s->mb_y;
2285
    const int mb_xy= mb_x + mb_y*s->mb_stride;
2286
    const int mb_type= s->current_picture.mb_type[mb_xy];
2287
    uint8_t  *dest_y, *dest_cb, *dest_cr;
2288
    int linesize, uvlinesize /*dct_offset*/;
2289
    int i;
2290

    
2291
    if(!s->decode)
2292
        return;
2293

    
2294
    if(s->mb_skiped){
2295
    }
2296

    
2297
    dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
2298
    dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2299
    dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2300

    
2301
    if (h->mb_field_decoding_flag) {
2302
        linesize = s->linesize * 2;
2303
        uvlinesize = s->uvlinesize * 2;
2304
        if(mb_y&1){ //FIXME move out of this func?
2305
            dest_y -= s->linesize*15;
2306
            dest_cb-= s->linesize*7;
2307
            dest_cr-= s->linesize*7;
2308
        }
2309
    } else {
2310
        linesize = s->linesize;
2311
        uvlinesize = s->uvlinesize;
2312
//        dct_offset = s->linesize * 16;
2313
    }
2314

    
2315
    if(IS_INTRA(mb_type)){
2316
        if(h->deblocking_filter)
2317
            xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1);
2318

    
2319
        if(!(s->flags&CODEC_FLAG_GRAY)){
2320
            h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2321
            h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2322
        }
2323

    
2324
        if(IS_INTRA4x4(mb_type)){
2325
            if(!s->encoding){
2326
                for(i=0; i<16; i++){
2327
                    uint8_t * const ptr= dest_y + h->block_offset[i];
2328
                    uint8_t *topright= ptr + 4 - linesize;
2329
                    const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2330
                    const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2331
                    int tr;
2332

    
2333
                    if(!topright_avail){
2334
                        tr= ptr[3 - linesize]*0x01010101;
2335
                        topright= (uint8_t*) &tr;
2336
                    }else if(i==5 && h->deblocking_filter){
2337
                        tr= *(uint32_t*)h->top_border[mb_x+1];
2338
                        topright= (uint8_t*) &tr;
2339
                    }
2340

    
2341
                    h->pred4x4[ dir ](ptr, topright, linesize);
2342
                    if(h->non_zero_count_cache[ scan8[i] ]){
2343
                        if(s->codec_id == CODEC_ID_H264)
2344
                            h264_add_idct_c(ptr, h->mb + i*16, linesize);
2345
                        else
2346
                            svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2347
                    }
2348
                }
2349
            }
2350
        }else{
2351
            h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2352
            if(s->codec_id == CODEC_ID_H264)
2353
                h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
2354
            else
2355
                svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2356
        }
2357
        if(h->deblocking_filter)
2358
            xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
2359
    }else if(s->codec_id == CODEC_ID_H264){
2360
        hl_motion(h, dest_y, dest_cb, dest_cr,
2361
                  s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab, 
2362
                  s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab);
2363
    }
2364

    
2365

    
2366
    if(!IS_INTRA4x4(mb_type)){
2367
        if(s->codec_id == CODEC_ID_H264){
2368
            for(i=0; i<16; i++){
2369
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2370
                    uint8_t * const ptr= dest_y + h->block_offset[i];
2371
                    h264_add_idct_c(ptr, h->mb + i*16, linesize);
2372
                }
2373
            }
2374
        }else{
2375
            for(i=0; i<16; i++){
2376
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2377
                    uint8_t * const ptr= dest_y + h->block_offset[i];
2378
                    svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2379
                }
2380
            }
2381
        }
2382
    }
2383

    
2384
    if(!(s->flags&CODEC_FLAG_GRAY)){
2385
        chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
2386
        chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
2387
        if(s->codec_id == CODEC_ID_H264){
2388
            for(i=16; i<16+4; i++){
2389
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2390
                    uint8_t * const ptr= dest_cb + h->block_offset[i];
2391
                    h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
2392
                }
2393
            }
2394
            for(i=20; i<20+4; i++){
2395
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2396
                    uint8_t * const ptr= dest_cr + h->block_offset[i];
2397
                    h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
2398
                }
2399
            }
2400
        }else{
2401
            for(i=16; i<16+4; i++){
2402
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2403
                    uint8_t * const ptr= dest_cb + h->block_offset[i];
2404
                    svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2405
                }
2406
            }
2407
            for(i=20; i<20+4; i++){
2408
                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2409
                    uint8_t * const ptr= dest_cr + h->block_offset[i];
2410
                    svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2411
                }
2412
            }
2413
        }
2414
    }
2415
    if(h->deblocking_filter) {
2416
        backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2417
        filter_mb(h, mb_x, mb_y, dest_y, dest_cb, dest_cr);
2418
    }
2419
}
2420

    
2421
/**
2422
 * fills the default_ref_list.
2423
 */
2424
static int fill_default_ref_list(H264Context *h){
2425
    MpegEncContext * const s = &h->s;
2426
    int i;
2427
    Picture sorted_short_ref[16];
2428
    
2429
    if(h->slice_type==B_TYPE){
2430
        int out_i;
2431
        int limit= -1;
2432

    
2433
        for(out_i=0; out_i<h->short_ref_count; out_i++){
2434
            int best_i=-1;
2435
            int best_poc=-1;
2436

    
2437
            for(i=0; i<h->short_ref_count; i++){
2438
                const int poc= h->short_ref[i]->poc;
2439
                if(poc > limit && poc < best_poc){
2440
                    best_poc= poc;
2441
                    best_i= i;
2442
                }
2443
            }
2444
            
2445
            assert(best_i != -1);
2446
            
2447
            limit= best_poc;
2448
            sorted_short_ref[out_i]= *h->short_ref[best_i];
2449
        }
2450
    }
2451

    
2452
    if(s->picture_structure == PICT_FRAME){
2453
        if(h->slice_type==B_TYPE){
2454
            const int current_poc= s->current_picture_ptr->poc;
2455
            int list;
2456

    
2457
            for(list=0; list<2; list++){
2458
                int index=0;
2459

    
2460
                for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++){
2461
                    const int i2= list ? h->short_ref_count - i - 1 : i;
2462
                    const int poc= sorted_short_ref[i2].poc;
2463
                    
2464
                    if(sorted_short_ref[i2].reference != 3) continue; //FIXME refernce field shit
2465

    
2466
                    if((list==1 && poc > current_poc) || (list==0 && poc < current_poc)){
2467
                        h->default_ref_list[list][index  ]= sorted_short_ref[i2];
2468
                        h->default_ref_list[list][index++].pic_id= sorted_short_ref[i2].frame_num;
2469
                    }
2470
                }
2471

    
2472
                for(i=0; i<h->long_ref_count && index < h->ref_count[ list ]; i++){
2473
                    if(h->long_ref[i]->reference != 3) continue;
2474

    
2475
                    h->default_ref_list[ list ][index  ]= *h->long_ref[i];
2476
                    h->default_ref_list[ list ][index++].pic_id= i;;
2477
                }
2478
                
2479
                if(h->long_ref_count > 1 && h->short_ref_count==0){
2480
                    Picture temp= h->default_ref_list[1][0];
2481
                    h->default_ref_list[1][0] = h->default_ref_list[1][1];
2482
                    h->default_ref_list[1][0] = temp;
2483
                }
2484

    
2485
                if(index < h->ref_count[ list ])
2486
                    memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
2487
            }
2488
        }else{
2489
            int index=0;
2490
            for(i=0; i<h->short_ref_count && index < h->ref_count[0]; i++){
2491
                if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
2492
                h->default_ref_list[0][index  ]= *h->short_ref[i];
2493
                h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2494
            }
2495
            for(i=0; i<h->long_ref_count && index < h->ref_count[0]; i++){
2496
                if(h->long_ref[i]->reference != 3) continue;
2497
                h->default_ref_list[0][index  ]= *h->long_ref[i];
2498
                h->default_ref_list[0][index++].pic_id= i;;
2499
            }
2500
            if(index < h->ref_count[0])
2501
                memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2502
        }
2503
    }else{ //FIELD
2504
        if(h->slice_type==B_TYPE){
2505
        }else{
2506
            //FIXME second field balh
2507
        }
2508
    }
2509
    return 0;
2510
}
2511

    
2512
static int decode_ref_pic_list_reordering(H264Context *h){
2513
    MpegEncContext * const s = &h->s;
2514
    int list;
2515
    
2516
    if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move beofre func
2517
    
2518
    for(list=0; list<2; list++){
2519
        memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
2520

    
2521
        if(get_bits1(&s->gb)){
2522
            int pred= h->curr_pic_num;
2523
            int index;
2524

    
2525
            for(index=0; ; index++){
2526
                int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
2527
                int pic_id;
2528
                int i;
2529
                
2530
                
2531
                if(index >= h->ref_count[list]){
2532
                    av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
2533
                    return -1;
2534
                }
2535
                
2536
                if(reordering_of_pic_nums_idc<3){
2537
                    if(reordering_of_pic_nums_idc<2){
2538
                        const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
2539

    
2540
                        if(abs_diff_pic_num >= h->max_pic_num){
2541
                            av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
2542
                            return -1;
2543
                        }
2544

    
2545
                        if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
2546
                        else                                pred+= abs_diff_pic_num;
2547
                        pred &= h->max_pic_num - 1;
2548
                    
2549
                        for(i= h->ref_count[list]-1; i>=index; i--){
2550
                            if(h->ref_list[list][i].pic_id == pred && h->ref_list[list][i].long_ref==0)
2551
                                break;
2552
                        }
2553
                    }else{
2554
                        pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
2555

    
2556
                        for(i= h->ref_count[list]-1; i>=index; i--){
2557
                            if(h->ref_list[list][i].pic_id == pic_id && h->ref_list[list][i].long_ref==1)
2558
                                break;
2559
                        }
2560
                    }
2561

    
2562
                    if(i < index){
2563
                        av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
2564
                        memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
2565
                    }else if(i > index){
2566
                        Picture tmp= h->ref_list[list][i];
2567
                        for(; i>index; i--){
2568
                            h->ref_list[list][i]= h->ref_list[list][i-1];
2569
                        }
2570
                        h->ref_list[list][index]= tmp;
2571
                    }
2572
                }else if(reordering_of_pic_nums_idc==3) 
2573
                    break;
2574
                else{
2575
                    av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
2576
                    return -1;
2577
                }
2578
            }
2579
        }
2580

    
2581
        if(h->slice_type!=B_TYPE) break;
2582
    }
2583
    return 0;    
2584
}
2585

    
2586
static int pred_weight_table(H264Context *h){
2587
    MpegEncContext * const s = &h->s;
2588
    int list, i;
2589
    
2590
    h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
2591
    h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
2592

    
2593
    for(list=0; list<2; list++){
2594
        for(i=0; i<h->ref_count[list]; i++){
2595
            int luma_weight_flag, chroma_weight_flag;
2596
            
2597
            luma_weight_flag= get_bits1(&s->gb);
2598
            if(luma_weight_flag){
2599
                h->luma_weight[list][i]= get_se_golomb(&s->gb);
2600
                h->luma_offset[list][i]= get_se_golomb(&s->gb);
2601
            }
2602

    
2603
            chroma_weight_flag= get_bits1(&s->gb);
2604
            if(chroma_weight_flag){
2605
                int j;
2606
                for(j=0; j<2; j++){
2607
                    h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
2608
                    h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
2609
                }
2610
            }
2611
        }
2612
        if(h->slice_type != B_TYPE) break;
2613
    }
2614
    return 0;
2615
}
2616

    
2617
/**
2618
 * instantaneos decoder refresh.
2619
 */
2620
static void idr(H264Context *h){
2621
    int i;
2622

    
2623
    for(i=0; i<h->long_ref_count; i++){
2624
        h->long_ref[i]->reference=0;
2625
        h->long_ref[i]= NULL;
2626
    }
2627
    h->long_ref_count=0;
2628

    
2629
    for(i=0; i<h->short_ref_count; i++){
2630
        h->short_ref[i]->reference=0;
2631
        h->short_ref[i]= NULL;
2632
    }
2633
    h->short_ref_count=0;
2634
}
2635

    
2636
/**
2637
 *
2638
 * @return the removed picture or NULL if an error occures
2639
 */
2640
static Picture * remove_short(H264Context *h, int frame_num){
2641
    MpegEncContext * const s = &h->s;
2642
    int i;
2643
    
2644
    if(s->avctx->debug&FF_DEBUG_MMCO)
2645
        av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
2646
    
2647
    for(i=0; i<h->short_ref_count; i++){
2648
        Picture *pic= h->short_ref[i];
2649
        if(s->avctx->debug&FF_DEBUG_MMCO)
2650
            av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
2651
        if(pic->frame_num == frame_num){
2652
            h->short_ref[i]= NULL;
2653
            memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
2654
            h->short_ref_count--;
2655
            return pic;
2656
        }
2657
    }
2658
    return NULL;
2659
}
2660

    
2661
/**
2662
 *
2663
 * @return the removed picture or NULL if an error occures
2664
 */
2665
static Picture * remove_long(H264Context *h, int i){
2666
    Picture *pic;
2667

    
2668
    if(i >= h->long_ref_count) return NULL;
2669
    pic= h->long_ref[i];
2670
    if(pic==NULL) return NULL;
2671
    
2672
    h->long_ref[i]= NULL;
2673
    memmove(&h->long_ref[i], &h->long_ref[i+1], (h->long_ref_count - i - 1)*sizeof(Picture*));
2674
    h->long_ref_count--;
2675

    
2676
    return pic;
2677
}
2678

    
2679
/**
2680
 * Executes the reference picture marking (memory management control operations).
2681
 */
2682
static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
2683
    MpegEncContext * const s = &h->s;
2684
    int i;
2685
    int current_is_long=0;
2686
    Picture *pic;
2687
    
2688
    if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
2689
        av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
2690
        
2691
    for(i=0; i<mmco_count; i++){
2692
        if(s->avctx->debug&FF_DEBUG_MMCO)
2693
            av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
2694

    
2695
        switch(mmco[i].opcode){
2696
        case MMCO_SHORT2UNUSED:
2697
            pic= remove_short(h, mmco[i].short_frame_num);
2698
            if(pic==NULL) return -1;
2699
            pic->reference= 0;
2700
            break;
2701
        case MMCO_SHORT2LONG:
2702
            pic= remove_long(h, mmco[i].long_index);
2703
            if(pic) pic->reference=0;
2704
            
2705
            h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
2706
            h->long_ref[ mmco[i].long_index ]->long_ref=1;
2707
            break;
2708
        case MMCO_LONG2UNUSED:
2709
            pic= remove_long(h, mmco[i].long_index);
2710
            if(pic==NULL) return -1;
2711
            pic->reference= 0;
2712
            break;
2713
        case MMCO_LONG:
2714
            pic= remove_long(h, mmco[i].long_index);
2715
            if(pic) pic->reference=0;
2716
            
2717
            h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
2718
            h->long_ref[ mmco[i].long_index ]->long_ref=1;
2719
            h->long_ref_count++;
2720
            
2721
            current_is_long=1;
2722
            break;
2723
        case MMCO_SET_MAX_LONG:
2724
            assert(mmco[i].long_index <= 16);
2725
            while(mmco[i].long_index < h->long_ref_count){
2726
                pic= remove_long(h, mmco[i].long_index);
2727
                pic->reference=0;
2728
            }
2729
            while(mmco[i].long_index > h->long_ref_count){
2730
                h->long_ref[ h->long_ref_count++ ]= NULL;
2731
            }
2732
            break;
2733
        case MMCO_RESET:
2734
            while(h->short_ref_count){
2735
                pic= remove_short(h, h->short_ref[0]->frame_num);
2736
                pic->reference=0;
2737
            }
2738
            while(h->long_ref_count){
2739
                pic= remove_long(h, h->long_ref_count-1);
2740
                pic->reference=0;
2741
            }
2742
            break;
2743
        default: assert(0);
2744
        }
2745
    }
2746
    
2747
    if(!current_is_long){
2748
        pic= remove_short(h, s->current_picture_ptr->frame_num);
2749
        if(pic){
2750
            pic->reference=0;
2751
            av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
2752
        }
2753
        
2754
        if(h->short_ref_count)
2755
            memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
2756

    
2757
        h->short_ref[0]= s->current_picture_ptr;
2758
        h->short_ref[0]->long_ref=0;
2759
        h->short_ref_count++;
2760
    }
2761
    
2762
    return 0; 
2763
}
2764

    
2765
static int decode_ref_pic_marking(H264Context *h){
2766
    MpegEncContext * const s = &h->s;
2767
    int i;
2768
    
2769
    if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
2770
        s->broken_link= get_bits1(&s->gb) -1;
2771
        h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
2772
        if(h->mmco[0].long_index == -1)
2773
            h->mmco_index= 0;
2774
        else{
2775
            h->mmco[0].opcode= MMCO_LONG;
2776
            h->mmco_index= 1;
2777
        } 
2778
    }else{
2779
        if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
2780
            for(i= h->mmco_index; i<MAX_MMCO_COUNT; i++) { 
2781
                MMCOOpcode opcode= get_ue_golomb(&s->gb);;
2782

    
2783
                h->mmco[i].opcode= opcode;
2784
                if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
2785
                    h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(&s->gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
2786
/*                    if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
2787
                        fprintf(stderr, "illegal short ref in memory management control operation %d\n", mmco);
2788
                        return -1;
2789
                    }*/
2790
                }
2791
                if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
2792
                    h->mmco[i].long_index= get_ue_golomb(&s->gb);
2793
                    if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ h->mmco[i].long_index >= 16){
2794
                        av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
2795
                        return -1;
2796
                    }
2797
                }
2798
                    
2799
                if(opcode > MMCO_LONG){
2800
                    av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
2801
                    return -1;
2802
                }
2803
            }
2804
            h->mmco_index= i;
2805
        }else{
2806
            assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
2807

    
2808
            if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
2809
                h->mmco[0].opcode= MMCO_SHORT2UNUSED;
2810
                h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
2811
                h->mmco_index= 1;
2812
            }else
2813
                h->mmco_index= 0;
2814
        }
2815
    }
2816
    
2817
    return 0; 
2818
}
2819

    
2820
static int init_poc(H264Context *h){
2821
    MpegEncContext * const s = &h->s;
2822
    const int max_frame_num= 1<<h->sps.log2_max_frame_num;
2823
    int field_poc[2];
2824

    
2825
    if(h->nal_unit_type == NAL_IDR_SLICE){
2826
        h->frame_num_offset= 0;
2827
    }else{
2828
        if(h->frame_num < h->prev_frame_num)
2829
            h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
2830
        else
2831
            h->frame_num_offset= h->prev_frame_num_offset;
2832
    }
2833

    
2834
    if(h->sps.poc_type==0){
2835
        const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
2836

    
2837
        if     (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
2838
            h->poc_msb = h->prev_poc_msb + max_poc_lsb;
2839
        else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
2840
            h->poc_msb = h->prev_poc_msb - max_poc_lsb;
2841
        else
2842
            h->poc_msb = h->prev_poc_msb;
2843
//printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
2844
        field_poc[0] = 
2845
        field_poc[1] = h->poc_msb + h->poc_lsb;
2846
        if(s->picture_structure == PICT_FRAME) 
2847
            field_poc[1] += h->delta_poc_bottom;
2848
    }else if(h->sps.poc_type==1){
2849
        int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
2850
        int i;
2851

    
2852
        if(h->sps.poc_cycle_length != 0)
2853
            abs_frame_num = h->frame_num_offset + h->frame_num;
2854
        else
2855
            abs_frame_num = 0;
2856

    
2857
        if(h->nal_ref_idc==0 && abs_frame_num > 0)
2858
            abs_frame_num--;
2859
            
2860
        expected_delta_per_poc_cycle = 0;
2861
        for(i=0; i < h->sps.poc_cycle_length; i++)
2862
            expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
2863

    
2864
        if(abs_frame_num > 0){
2865
            int poc_cycle_cnt          = (abs_frame_num - 1) / h->sps.poc_cycle_length;
2866
            int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
2867

    
2868
            expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
2869
            for(i = 0; i <= frame_num_in_poc_cycle; i++)
2870
                expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
2871
        } else
2872
            expectedpoc = 0;
2873

    
2874
        if(h->nal_ref_idc == 0) 
2875
            expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
2876
        
2877
        field_poc[0] = expectedpoc + h->delta_poc[0];
2878
        field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
2879

    
2880
        if(s->picture_structure == PICT_FRAME)
2881
            field_poc[1] += h->delta_poc[1];
2882
    }else{
2883
        int poc;
2884
        if(h->nal_unit_type == NAL_IDR_SLICE){
2885
            poc= 0;
2886
        }else{
2887
            if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
2888
            else               poc= 2*(h->frame_num_offset + h->frame_num) - 1;
2889
        }
2890
        field_poc[0]= poc;
2891
        field_poc[1]= poc;
2892
    }
2893
    
2894
    if(s->picture_structure != PICT_BOTTOM_FIELD)
2895
        s->current_picture_ptr->field_poc[0]= field_poc[0];
2896
    if(s->picture_structure != PICT_TOP_FIELD)
2897
        s->current_picture_ptr->field_poc[1]= field_poc[1];
2898
    if(s->picture_structure == PICT_FRAME) // FIXME field pix?
2899
        s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
2900

    
2901
    return 0;
2902
}
2903

    
2904
/**
2905
 * decodes a slice header.
2906
 * this will allso call MPV_common_init() and frame_start() as needed
2907
 */
2908
static int decode_slice_header(H264Context *h){
2909
    MpegEncContext * const s = &h->s;
2910
    int first_mb_in_slice, pps_id;
2911
    int num_ref_idx_active_override_flag;
2912
    static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
2913

    
2914
    s->current_picture.reference= h->nal_ref_idc != 0;
2915

    
2916
    first_mb_in_slice= get_ue_golomb(&s->gb);
2917

    
2918
    h->slice_type= get_ue_golomb(&s->gb);
2919
    if(h->slice_type > 9){
2920
        av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
2921
    }
2922
    if(h->slice_type > 4){
2923
        h->slice_type -= 5;
2924
        h->slice_type_fixed=1;
2925
    }else
2926
        h->slice_type_fixed=0;
2927
    
2928
    h->slice_type= slice_type_map[ h->slice_type ];
2929
    
2930
    s->pict_type= h->slice_type; // to make a few old func happy, its wrong though
2931
        
2932
    pps_id= get_ue_golomb(&s->gb);
2933
    if(pps_id>255){
2934
        av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
2935
        return -1;
2936
    }
2937
    h->pps= h->pps_buffer[pps_id];
2938
    if(h->pps.slice_group_count == 0){
2939
        av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
2940
        return -1;
2941
    }
2942

    
2943
    h->sps= h->sps_buffer[ h->pps.sps_id ];
2944
    if(h->sps.log2_max_frame_num == 0){
2945
        av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
2946
        return -1;
2947
    }
2948
    
2949
    s->mb_width= h->sps.mb_width;
2950
    s->mb_height= h->sps.mb_height;
2951
    
2952
    h->b_stride=  s->mb_width*4;
2953
    h->b8_stride= s->mb_width*2;
2954

    
2955
    s->mb_x = first_mb_in_slice % s->mb_width;
2956
    s->mb_y = first_mb_in_slice / s->mb_width; //FIXME AFFW
2957
    
2958
    s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
2959
    if(h->sps.frame_mbs_only_flag)
2960
        s->height= 16*s->mb_height - 2*(h->sps.crop_top  + h->sps.crop_bottom);
2961
    else
2962
        s->height= 16*s->mb_height - 4*(h->sps.crop_top  + h->sps.crop_bottom); //FIXME recheck
2963
    
2964
    if (s->context_initialized 
2965
        && (   s->width != s->avctx->width || s->height != s->avctx->height)) {
2966
        free_tables(h);
2967
        MPV_common_end(s);
2968
    }
2969
    if (!s->context_initialized) {
2970
        if (MPV_common_init(s) < 0)
2971
            return -1;
2972

    
2973
        alloc_tables(h);
2974

    
2975
        s->avctx->width = s->width;
2976
        s->avctx->height = s->height;
2977
        s->avctx->sample_aspect_ratio= h->sps.sar;
2978
    }
2979

    
2980
    if(first_mb_in_slice == 0){
2981
        frame_start(h);
2982
    }
2983

    
2984
    s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
2985
    h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
2986

    
2987
    if(h->sps.frame_mbs_only_flag){
2988
        s->picture_structure= PICT_FRAME;
2989
    }else{
2990
        if(get_bits1(&s->gb)) //field_pic_flag
2991
            s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
2992
        else
2993
            s->picture_structure= PICT_FRAME;
2994
    }
2995

    
2996
    if(s->picture_structure==PICT_FRAME){
2997
        h->curr_pic_num=   h->frame_num;
2998
        h->max_pic_num= 1<< h->sps.log2_max_frame_num;
2999
    }else{
3000
        h->curr_pic_num= 2*h->frame_num;
3001
        h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
3002
    }
3003
        
3004
    if(h->nal_unit_type == NAL_IDR_SLICE){
3005
        get_ue_golomb(&s->gb); /* idr_pic_id */
3006
    }
3007
   
3008
    if(h->sps.poc_type==0){
3009
        h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
3010
        
3011
        if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
3012
            h->delta_poc_bottom= get_se_golomb(&s->gb);
3013
        }
3014
    }
3015
    
3016
    if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
3017
        h->delta_poc[0]= get_se_golomb(&s->gb);
3018
        
3019
        if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
3020
            h->delta_poc[1]= get_se_golomb(&s->gb);
3021
    }
3022
    
3023
    init_poc(h);
3024
    
3025
    if(h->pps.redundant_pic_cnt_present){
3026
        h->redundant_pic_count= get_ue_golomb(&s->gb);
3027
    }
3028

    
3029
    //set defaults, might be overriden a few line later
3030
    h->ref_count[0]= h->pps.ref_count[0];
3031
    h->ref_count[1]= h->pps.ref_count[1];
3032

    
3033
    if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
3034
        if(h->slice_type == B_TYPE){
3035
            h->direct_spatial_mv_pred= get_bits1(&s->gb);
3036
        }
3037
        num_ref_idx_active_override_flag= get_bits1(&s->gb);
3038
    
3039
        if(num_ref_idx_active_override_flag){
3040
            h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
3041
            if(h->slice_type==B_TYPE)
3042
                h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
3043

    
3044
            if(h->ref_count[0] > 32 || h->ref_count[1] > 32){
3045
                av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
3046
                return -1;
3047
            }
3048
        }
3049
    }
3050

    
3051
    if(first_mb_in_slice == 0){
3052
        fill_default_ref_list(h);
3053
    }
3054

    
3055
    decode_ref_pic_list_reordering(h);
3056

    
3057
    if(   (h->pps.weighted_pred          && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE )) 
3058
       || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
3059
        pred_weight_table(h);
3060
    
3061
    if(s->current_picture.reference)
3062
        decode_ref_pic_marking(h);
3063

    
3064
    if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac )
3065
        h->cabac_init_idc = get_ue_golomb(&s->gb);
3066

    
3067
    h->last_qscale_diff = 0;
3068
    s->qscale = h->pps.init_qp + get_se_golomb(&s->gb);
3069
    if(s->qscale<0 || s->qscale>51){
3070
        av_log(s->avctx, AV_LOG_ERROR, "QP %d out of range\n", s->qscale);
3071
        return -1;
3072
    }
3073
    //FIXME qscale / qp ... stuff
3074
    if(h->slice_type == SP_TYPE){
3075
        get_bits1(&s->gb); /* sp_for_switch_flag */
3076
    }
3077
    if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
3078
        get_se_golomb(&s->gb); /* slice_qs_delta */
3079
    }
3080

    
3081
    h->deblocking_filter = 1;
3082
    h->slice_alpha_c0_offset = 0;
3083
    h->slice_beta_offset = 0;
3084
    if( h->pps.deblocking_filter_parameters_present ) {
3085
        h->deblocking_filter= get_ue_golomb(&s->gb);
3086
        if(h->deblocking_filter < 2) 
3087
            h->deblocking_filter^= 1; // 1<->0
3088

    
3089
        if( h->deblocking_filter ) {
3090
            h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
3091
            h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
3092
        }
3093
    }
3094

    
3095
#if 0 //FMO
3096
    if( h->pps.num_slice_groups > 1  && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
3097
        slice_group_change_cycle= get_bits(&s->gb, ?);
3098
#endif
3099

    
3100
    if(s->avctx->debug&FF_DEBUG_PICT_INFO){
3101
        av_log(h->s.avctx, AV_LOG_DEBUG, "mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d\n", 
3102
               first_mb_in_slice, 
3103
               av_get_pict_type_char(h->slice_type),
3104
               pps_id, h->frame_num,
3105
               s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
3106
               h->ref_count[0], h->ref_count[1],
3107
               s->qscale,
3108
               h->deblocking_filter
3109
               );
3110
    }
3111

    
3112
    return 0;
3113
}
3114

    
3115
/**
3116
 *
3117
 */
3118
static inline int get_level_prefix(GetBitContext *gb){
3119
    unsigned int buf;
3120
    int log;
3121
    
3122
    OPEN_READER(re, gb);
3123
    UPDATE_CACHE(re, gb);
3124
    buf=GET_CACHE(re, gb);
3125
    
3126
    log= 32 - av_log2(buf);
3127
#ifdef TRACE
3128
    print_bin(buf>>(32-log), log);
3129
    printf("%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
3130
#endif
3131

    
3132
    LAST_SKIP_BITS(re, gb, log);
3133
    CLOSE_READER(re, gb);
3134

    
3135
    return log-1;
3136
}
3137

    
3138
/**
3139
 * decodes a residual block.
3140
 * @param n block index
3141
 * @param scantable scantable
3142
 * @param max_coeff number of coefficients in the block
3143
 * @return <0 if an error occured
3144
 */
3145
static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, int qp, int max_coeff){
3146
    MpegEncContext * const s = &h->s;
3147
    const uint16_t *qmul= dequant_coeff[qp];
3148
    static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
3149
    int level[16], run[16];
3150
    int suffix_length, zeros_left, coeff_num, coeff_token, total_coeff, i, trailing_ones;
3151

    
3152
    //FIXME put trailing_onex into the context
3153

    
3154
    if(n == CHROMA_DC_BLOCK_INDEX){
3155
        coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
3156
        total_coeff= coeff_token>>2;
3157
    }else{    
3158
        if(n == LUMA_DC_BLOCK_INDEX){
3159
            total_coeff= pred_non_zero_count(h, 0);
3160
            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
3161
            total_coeff= coeff_token>>2;
3162
        }else{
3163
            total_coeff= pred_non_zero_count(h, n);
3164
            coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
3165
            total_coeff= coeff_token>>2;
3166
            h->non_zero_count_cache[ scan8[n] ]= total_coeff;
3167
        }
3168
    }
3169

    
3170
    //FIXME set last_non_zero?
3171

    
3172
    if(total_coeff==0)
3173
        return 0;
3174
        
3175
    trailing_ones= coeff_token&3;
3176
    tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
3177
    assert(total_coeff<=16);
3178
    
3179
    for(i=0; i<trailing_ones; i++){
3180
        level[i]= 1 - 2*get_bits1(gb);
3181
    }
3182

    
3183
    suffix_length= total_coeff > 10 && trailing_ones < 3;
3184

    
3185
    for(; i<total_coeff; i++){
3186
        const int prefix= get_level_prefix(gb);
3187
        int level_code, mask;
3188

    
3189
        if(prefix<14){ //FIXME try to build a large unified VLC table for all this
3190
            if(suffix_length)
3191
                level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
3192
            else
3193
                level_code= (prefix<<suffix_length); //part
3194
        }else if(prefix==14){
3195
            if(suffix_length)
3196
                level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
3197
            else
3198
                level_code= prefix + get_bits(gb, 4); //part
3199
        }else if(prefix==15){
3200
            level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
3201
            if(suffix_length==0) level_code+=15; //FIXME doesnt make (much)sense
3202
        }else{
3203
            av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
3204
            return -1;
3205
        }
3206

    
3207
        if(i==trailing_ones && i<3) level_code+= 2; //FIXME split first iteration
3208

    
3209
        mask= -(level_code&1);
3210
        level[i]= (((2+level_code)>>1) ^ mask) - mask;
3211

    
3212
        if(suffix_length==0) suffix_length=1; //FIXME split first iteration
3213

    
3214
#if 1
3215
        if(ABS(level[i]) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
3216
#else        
3217
        if((2+level_code)>>1) > (3<<(suffix_length-1)) && suffix_length<6) suffix_length++;
3218
        /* ? == prefix > 2 or sth */
3219
#endif
3220
        tprintf("level: %d suffix_length:%d\n", level[i], suffix_length);
3221
    }
3222

    
3223
    if(total_coeff == max_coeff)
3224
        zeros_left=0;
3225
    else{
3226
        if(n == CHROMA_DC_BLOCK_INDEX)
3227
            zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
3228
        else
3229
            zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
3230
    }
3231
    
3232
    for(i=0; i<total_coeff-1; i++){
3233
        if(zeros_left <=0)
3234
            break;
3235
        else if(zeros_left < 7){
3236
            run[i]= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
3237
        }else{
3238
            run[i]= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
3239
        }
3240
        zeros_left -= run[i];
3241
    }
3242

    
3243
    if(zeros_left<0){
3244
        av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
3245
        return -1;
3246
    }
3247
    
3248
    for(; i<total_coeff-1; i++){
3249
        run[i]= 0;
3250
    }
3251

    
3252
    run[i]= zeros_left;
3253

    
3254
    coeff_num=-1;
3255
    if(n > 24){
3256
        for(i=total_coeff-1; i>=0; i--){ //FIXME merge into rundecode?
3257
            int j;
3258

    
3259
            coeff_num += run[i] + 1; //FIXME add 1 earlier ?
3260
            j= scantable[ coeff_num ];
3261

    
3262
            block[j]= level[i];
3263
        }
3264
    }else{
3265
        for(i=total_coeff-1; i>=0; i--){ //FIXME merge into  rundecode?
3266
            int j;
3267

    
3268
            coeff_num += run[i] + 1; //FIXME add 1 earlier ?
3269
            j= scantable[ coeff_num ];
3270

    
3271
            block[j]= level[i] * qmul[j];
3272
//            printf("%d %d  ", block[j], qmul[j]);
3273
        }
3274
    }
3275
    return 0;
3276
}
3277

    
3278
/**
3279
 * decodes a macroblock
3280
 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
3281
 */
3282
static int decode_mb_cavlc(H264Context *h){
3283
    MpegEncContext * const s = &h->s;
3284
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
3285
    int mb_type, partition_count, cbp;
3286

    
3287
    s->dsp.clear_blocks(h->mb); //FIXME avoid if allready clear (move after skip handlong?    
3288

    
3289
    tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
3290
    cbp = 0; /* avoid warning. FIXME: find a solution without slowing
3291
                down the code */
3292
    if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
3293
        if(s->mb_skip_run==-1)
3294
            s->mb_skip_run= get_ue_golomb(&s->gb);
3295
        
3296
        if (s->mb_skip_run--) {
3297
            int mx, my;
3298
            /* skip mb */
3299
//FIXME b frame
3300
            mb_type= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0;
3301

    
3302
            memset(h->non_zero_count[mb_xy], 0, 16);
3303
            memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
3304

    
3305
            if(h->sps.mb_aff && s->mb_skip_run==0 && (s->mb_y&1)==0){
3306
                h->mb_field_decoding_flag= get_bits1(&s->gb);
3307
            }
3308

    
3309
            if(h->mb_field_decoding_flag)
3310
                mb_type|= MB_TYPE_INTERLACED;
3311
            
3312
            fill_caches(h, mb_type); //FIXME check what is needed and what not ...
3313
            pred_pskip_motion(h, &mx, &my);
3314
            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
3315
            fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
3316
            write_back_motion(h, mb_type);
3317

    
3318
            s->current_picture.mb_type[mb_xy]= mb_type; //FIXME SKIP type
3319
            s->current_picture.qscale_table[mb_xy]= s->qscale;
3320
            h->slice_table[ mb_xy ]= h->slice_num;
3321

    
3322
            h->prev_mb_skiped= 1;
3323
            return 0;
3324
        }
3325
    }
3326
    if(h->sps.mb_aff /* && !field pic FIXME needed? */){
3327
        if((s->mb_y&1)==0)
3328
            h->mb_field_decoding_flag = get_bits1(&s->gb);
3329
    }else
3330
        h->mb_field_decoding_flag=0; //FIXME som ed note ?!
3331
    
3332
    h->prev_mb_skiped= 0;
3333
    
3334
    mb_type= get_ue_golomb(&s->gb);
3335
    if(h->slice_type == B_TYPE){
3336
        if(mb_type < 23){
3337
            partition_count= b_mb_type_info[mb_type].partition_count;
3338
            mb_type=         b_mb_type_info[mb_type].type;
3339
        }else{
3340
            mb_type -= 23;
3341
            goto decode_intra_mb;
3342
        }
3343
    }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
3344
        if(mb_type < 5){
3345
            partition_count= p_mb_type_info[mb_type].partition_count;
3346
            mb_type=         p_mb_type_info[mb_type].type;
3347
        }else{
3348
            mb_type -= 5;
3349
            goto decode_intra_mb;
3350
        }
3351
    }else{
3352
       assert(h->slice_type == I_TYPE);
3353
decode_intra_mb:
3354
        if(mb_type > 25){
3355
            av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice to large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
3356
            return -1;
3357
        }
3358
        partition_count=0;
3359
        cbp= i_mb_type_info[mb_type].cbp;
3360
        h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
3361
        mb_type= i_mb_type_info[mb_type].type;
3362
    }
3363

    
3364
    if(h->mb_field_decoding_flag)
3365
        mb_type |= MB_TYPE_INTERLACED;
3366

    
3367
    s->current_picture.mb_type[mb_xy]= mb_type;
3368
    h->slice_table[ mb_xy ]= h->slice_num;
3369
    
3370
    if(IS_INTRA_PCM(mb_type)){
3371
        const uint8_t *ptr;
3372
        int x, y;
3373
        
3374
        // we assume these blocks are very rare so we dont optimize it
3375
        align_get_bits(&s->gb);
3376
        
3377
        ptr= s->gb.buffer + get_bits_count(&s->gb);
3378
    
3379
        for(y=0; y<16; y++){
3380
            const int index= 4*(y&3) + 64*(y>>2);
3381
            for(x=0; x<16; x++){
3382
                h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++);
3383
            }
3384
        }
3385
        for(y=0; y<8; y++){
3386
            const int index= 256 + 4*(y&3) + 32*(y>>2);
3387
            for(x=0; x<8; x++){
3388
                h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++);
3389
            }
3390
        }
3391
        for(y=0; y<8; y++){
3392
            const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
3393
            for(x=0; x<8; x++){
3394
                h->mb[index + (x&3) + 16*(x>>2)]= *(ptr++);
3395
            }
3396
        }
3397
    
3398
        skip_bits(&s->gb, 384); //FIXME check /fix the bitstream readers
3399
        
3400
        //FIXME deblock filter, non_zero_count_cache init ...
3401
        memset(h->non_zero_count[mb_xy], 16, 16);
3402
        s->current_picture.qscale_table[mb_xy]= s->qscale;
3403
        
3404
        return 0;
3405
    }
3406
        
3407
    fill_caches(h, mb_type);
3408

    
3409
    //mb_pred
3410
    if(IS_INTRA(mb_type)){
3411
//            init_top_left_availability(h);
3412
            if(IS_INTRA4x4(mb_type)){
3413
                int i;
3414

    
3415
//                fill_intra4x4_pred_table(h);
3416
                for(i=0; i<16; i++){
3417
                    const int mode_coded= !get_bits1(&s->gb);
3418
                    const int predicted_mode=  pred_intra_mode(h, i);
3419
                    int mode;
3420

    
3421
                    if(mode_coded){
3422
                        const int rem_mode= get_bits(&s->gb, 3);
3423
                        if(rem_mode<predicted_mode)
3424
                            mode= rem_mode;
3425
                        else
3426
                            mode= rem_mode + 1;
3427
                    }else{
3428
                        mode= predicted_mode;
3429
                    }
3430
                    
3431
                    h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
3432
                }
3433
                write_back_intra_pred_mode(h);
3434
                if( check_intra4x4_pred_mode(h) < 0)
3435
                    return -1;
3436
            }else{
3437
                h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
3438
                if(h->intra16x16_pred_mode < 0)
3439
                    return -1;
3440
            }
3441
            h->chroma_pred_mode= get_ue_golomb(&s->gb);
3442

    
3443
            h->chroma_pred_mode= check_intra_pred_mode(h, h->chroma_pred_mode);
3444
            if(h->chroma_pred_mode < 0)
3445
                return -1;
3446
    }else if(partition_count==4){
3447
        int i, j, sub_partition_count[4], list, ref[2][4];
3448
        
3449
        if(h->slice_type == B_TYPE){
3450
            for(i=0; i<4; i++){
3451
                h->sub_mb_type[i]= get_ue_golomb(&s->gb);
3452
                if(h->sub_mb_type[i] >=13){
3453
                    av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
3454
                    return -1;
3455
                }
3456
                sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
3457
                h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
3458
            }
3459
        }else{
3460
            assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
3461
            for(i=0; i<4; i++){
3462
                h->sub_mb_type[i]= get_ue_golomb(&s->gb);
3463
                if(h->sub_mb_type[i] >=4){
3464
                    av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %d out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
3465
                    return -1;
3466
                }
3467
                sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
3468
                h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
3469
            }
3470
        }
3471
        
3472
        for(list=0; list<2; list++){
3473
            const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
3474
            if(ref_count == 0) continue;
3475
            for(i=0; i<4; i++){
3476
                if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
3477
                    ref[list][i] = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
3478
                }else{
3479
                 //FIXME
3480
                    ref[list][i] = -1;
3481
                }
3482
            }
3483
        }
3484
        
3485
        for(list=0; list<2; list++){
3486
            const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
3487
            if(ref_count == 0) continue;
3488

    
3489
            for(i=0; i<4; i++){
3490
                h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
3491
                h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
3492

    
3493
                if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
3494
                    const int sub_mb_type= h->sub_mb_type[i];
3495
                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
3496
                    for(j=0; j<sub_partition_count[i]; j++){
3497
                        int mx, my;
3498
                        const int index= 4*i + block_width*j;
3499
                        int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
3500
                        pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
3501
                        mx += get_se_golomb(&s->gb);
3502
                        my += get_se_golomb(&s->gb);
3503
                        tprintf("final mv:%d %d\n", mx, my);
3504

    
3505
                        if(IS_SUB_8X8(sub_mb_type)){
3506
                            mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= 
3507
                            mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
3508
                            mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= 
3509
                            mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
3510
                        }else if(IS_SUB_8X4(sub_mb_type)){
3511
                            mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
3512
                            mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= my;
3513
                        }else if(IS_SUB_4X8(sub_mb_type)){
3514
                            mv_cache[ 0 ][0]= mv_cache[ 8 ][0]= mx;
3515
                            mv_cache[ 0 ][1]= mv_cache[ 8 ][1]= my;
3516
                        }else{
3517
                            assert(IS_SUB_4X4(sub_mb_type));
3518
                            mv_cache[ 0 ][0]= mx;
3519
                            mv_cache[ 0 ][1]= my;
3520
                        }
3521
                    }
3522
                }else{
3523
                    uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
3524
                    p[0] = p[1]=
3525
                    p[8] = p[9]= 0;
3526
                }
3527
            }
3528
        }
3529
    }else if(!IS_DIRECT(mb_type)){
3530
        int list, mx, my, i;
3531
         //FIXME we should set ref_idx_l? to 0 if we use that later ...
3532
        if(IS_16X16(mb_type)){
3533
            for(list=0; list<2; list++){
3534
                if(h->ref_count[0]>0){
3535
                    if(IS_DIR(mb_type, 0, list)){
3536
                        const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
3537
                        fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
3538
                    }
3539
                }
3540
            }
3541
            for(list=0; list<2; list++){
3542
                if(IS_DIR(mb_type, 0, list)){
3543
                    pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
3544
                    mx += get_se_golomb(&s->gb);
3545
                    my += get_se_golomb(&s->gb);
3546
                    tprintf("final mv:%d %d\n", mx, my);
3547

    
3548
                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
3549
                }
3550
            }
3551
        }
3552
        else if(IS_16X8(mb_type)){
3553
            for(list=0; list<2; list++){
3554
                if(h->ref_count[list]>0){
3555
                    for(i=0; i<2; i++){
3556
                        if(IS_DIR(mb_type, i, list)){
3557
                            const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
3558
                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
3559
                        }
3560
                    }
3561
                }
3562
            }
3563
            for(list=0; list<2; list++){
3564
                for(i=0; i<2; i++){
3565
                    if(IS_DIR(mb_type, i, list)){
3566
                        pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
3567
                        mx += get_se_golomb(&s->gb);
3568
                        my += get_se_golomb(&s->gb);
3569
                        tprintf("final mv:%d %d\n", mx, my);
3570

    
3571
                        fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
3572
                    }
3573
                }
3574
            }
3575
        }else{
3576
            assert(IS_8X16(mb_type));
3577
            for(list=0; list<2; list++){
3578
                if(h->ref_count[list]>0){
3579
                    for(i=0; i<2; i++){
3580
                        if(IS_DIR(mb_type, i, list)){ //FIXME optimize
3581
                            const int val= get_te0_golomb(&s->gb, h->ref_count[list]);
3582
                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
3583
                        }
3584
                    }
3585
                }
3586
            }
3587
            for(list=0; list<2; list++){
3588
                for(i=0; i<2; i++){
3589
                    if(IS_DIR(mb_type, i, list)){
3590
                        pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
3591
                        mx += get_se_golomb(&s->gb);
3592
                        my += get_se_golomb(&s->gb);
3593
                        tprintf("final mv:%d %d\n", mx, my);
3594

    
3595
                        fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
3596
                    }
3597
                }
3598
            }
3599
        }
3600
    }
3601
    
3602
    if(IS_INTER(mb_type))
3603
        write_back_motion(h, mb_type);
3604
    
3605
    if(!IS_INTRA16x16(mb_type)){
3606
        cbp= get_ue_golomb(&s->gb);
3607
        if(cbp > 47){
3608
            av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
3609
            return -1;
3610
        }
3611
        
3612
        if(IS_INTRA4x4(mb_type))
3613
            cbp= golomb_to_intra4x4_cbp[cbp];
3614
        else
3615
            cbp= golomb_to_inter_cbp[cbp];
3616
    }
3617

    
3618
    if(cbp || IS_INTRA16x16(mb_type)){
3619
        int i8x8, i4x4, chroma_idx;
3620
        int chroma_qp, dquant;
3621
        GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
3622
        const uint8_t *scan, *dc_scan;
3623
        
3624
//        fill_non_zero_count_cache(h);
3625

    
3626
        if(IS_INTERLACED(mb_type)){
3627
            scan= field_scan;
3628
            dc_scan= luma_dc_field_scan;
3629
        }else{
3630
            scan= zigzag_scan;
3631
            dc_scan= luma_dc_zigzag_scan;
3632
        }
3633

    
3634
        dquant= get_se_golomb(&s->gb);
3635

    
3636
        if( dquant > 25 || dquant < -26 ){
3637
            av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
3638
            return -1;
3639
        }
3640
        
3641
        s->qscale += dquant;
3642
        if(((unsigned)s->qscale) > 51){
3643
            if(s->qscale<0) s->qscale+= 52;
3644
            else            s->qscale-= 52;
3645
        }
3646
        
3647
        h->chroma_qp= chroma_qp= get_chroma_qp(h, s->qscale);
3648
        if(IS_INTRA16x16(mb_type)){
3649
            if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, s->qscale, 16) < 0){
3650
                return -1; //FIXME continue if partotioned and other retirn -1 too
3651
            }
3652

    
3653
            assert((cbp&15) == 0 || (cbp&15) == 15);
3654

    
3655
            if(cbp&15){
3656
                for(i8x8=0; i8x8<4; i8x8++){
3657
                    for(i4x4=0; i4x4<4; i4x4++){
3658
                        const int index= i4x4 + 4*i8x8;
3659
                        if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, s->qscale, 15) < 0 ){
3660
                            return -1;
3661
                        }
3662
                    }
3663
                }
3664
            }else{
3665
                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
3666
            }
3667
        }else{
3668
            for(i8x8=0; i8x8<4; i8x8++){
3669
                if(cbp & (1<<i8x8)){
3670
                    for(i4x4=0; i4x4<4; i4x4++){
3671
                        const int index= i4x4 + 4*i8x8;
3672
                        
3673
                        if( decode_residual(h, gb, h->mb + 16*index, index, scan, s->qscale, 16) <0 ){
3674
                            return -1;
3675
                        }
3676
                    }
3677
                }else{
3678
                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
3679
                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
3680
                }
3681
            }
3682
        }
3683
        
3684
        if(cbp&0x30){
3685
            for(chroma_idx=0; chroma_idx<2; chroma_idx++)
3686
                if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, chroma_qp, 4) < 0){
3687
                    return -1;
3688
                }
3689
        }
3690

    
3691
        if(cbp&0x20){
3692
            for(chroma_idx=0; chroma_idx<2; chroma_idx++){
3693
                for(i4x4=0; i4x4<4; i4x4++){
3694
                    const int index= 16 + 4*chroma_idx + i4x4;
3695
                    if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, chroma_qp, 15) < 0){
3696
                        return -1;
3697
                    }
3698
                }
3699
            }
3700
        }else{
3701
            uint8_t * const nnz= &h->non_zero_count_cache[0];
3702
            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
3703
            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
3704
        }
3705
    }else{
3706
        uint8_t * const nnz= &h->non_zero_count_cache[0];
3707
        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
3708
        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
3709
        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
3710
    }
3711
    s->current_picture.qscale_table[mb_xy]= s->qscale;
3712
    write_back_non_zero_count(h);
3713

    
3714
    return 0;
3715
}
3716

    
3717
static int decode_cabac_mb_type( H264Context *h ) {
3718
    MpegEncContext * const s = &h->s;
3719

    
3720
    if( h->slice_type == I_TYPE ) {
3721
        const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
3722
        int ctx = 0;
3723
        int mb_type;
3724

    
3725
        if( s->mb_x > 0 && !IS_INTRA4x4( s->current_picture.mb_type[mb_xy-1] ) )
3726
            ctx++;
3727
        if( s->mb_y > 0 && !IS_INTRA4x4( s->current_picture.mb_type[mb_xy-s->mb_stride] ) )
3728
            ctx++;
3729

    
3730
        if( get_cabac( &h->cabac, &h->cabac_state[3+ctx] ) == 0 )
3731
            return 0;   /* I4x4 */
3732

    
3733
        if( get_cabac_terminate( &h->cabac ) )
3734
            return 25;  /* PCM */
3735

    
3736
        mb_type = 1;    /* I16x16 */
3737
        if( get_cabac( &h->cabac, &h->cabac_state[3+3] ) )
3738
            mb_type += 12;  /* cbp_luma != 0 */
3739

    
3740
        if( get_cabac( &h->cabac, &h->cabac_state[3+4] ) ) {
3741
            if( get_cabac( &h->cabac, &h->cabac_state[3+5] ) )
3742
                mb_type += 4 * 2;   /* cbp_chroma == 2 */
3743
            else
3744
                mb_type += 4 * 1;   /* cbp_chroma == 1 */
3745
        }
3746
        if( get_cabac( &h->cabac, &h->cabac_state[3+6] ) )
3747
            mb_type += 2;
3748
        if( get_cabac( &h->cabac, &h->cabac_state[3+7] ) )
3749
            mb_type += 1;
3750
        return mb_type;
3751

    
3752
    } else if( h->slice_type == P_TYPE ) {
3753
        if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
3754
            /* P-type */
3755
            if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
3756
                if( get_cabac( &h->cabac, &h->cabac_state[16] ) == 0 )
3757
                    return 0; /* P_L0_D16x16; */
3758
                else
3759
                    return 3; /* P_8x8; */
3760
            } else {
3761
                if( get_cabac( &h->cabac, &h->cabac_state[17] ) == 0 )
3762
                    return 1; /* P_L0_D16x8; */
3763
                else
3764
                    return 2; /* P_L0_D8x16; */
3765
            }
3766
        } else {
3767
            int mb_type;
3768
            /* I-type */
3769
            if( get_cabac( &h->cabac, &h->cabac_state[17] ) == 0 )
3770
                return 5+0; /* I_4x4 */
3771
            if( get_cabac_terminate( &h->cabac ) )
3772
                return 5+25; /*I_PCM */
3773
            mb_type = 5+1;    /* I16x16 */
3774
            if( get_cabac( &h->cabac, &h->cabac_state[17+1] ) )
3775
                mb_type += 12;  /* cbp_luma != 0 */
3776

    
3777
            if( get_cabac( &h->cabac, &h->cabac_state[17+2] ) ) {
3778
                if( get_cabac( &h->cabac, &h->cabac_state[17+2] ) )
3779
                    mb_type += 4 * 2;   /* cbp_chroma == 2 */
3780
                else
3781
                    mb_type += 4 * 1;   /* cbp_chroma == 1 */
3782
            }
3783
            if( get_cabac( &h->cabac, &h->cabac_state[17+3] ) )
3784
                mb_type += 2;
3785
            if( get_cabac( &h->cabac, &h->cabac_state[17+3] ) )
3786
                mb_type += 1;
3787

    
3788
            return mb_type;
3789
        }
3790
    } else {
3791
        /* TODO do others frames types */
3792
        return -1;
3793
    }
3794
}
3795

    
3796
static int decode_cabac_mb_skip( H264Context *h) {
3797
    MpegEncContext * const s = &h->s;
3798
    const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
3799
    const int mba_xy = mb_xy - 1;
3800
    const int mbb_xy = mb_xy - s->mb_stride;
3801
    int ctx = 0;
3802

    
3803
    if( s->mb_x > 0 && !IS_SKIP( s->current_picture.mb_type[mba_xy] ) )
3804
        ctx++;
3805
    if( s->mb_y > 0 && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ) )
3806
        ctx++;
3807

    
3808
    if( h->slice_type == P_TYPE || h->slice_type == SP_TYPE)
3809
        return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
3810
    else /* B-frame */
3811
        return get_cabac( &h->cabac, &h->cabac_state[24+ctx] );
3812
}
3813

    
3814
static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
3815
    int mode = 0;
3816

    
3817
    if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
3818
        return pred_mode;
3819

    
3820
    if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
3821
        mode += 1;
3822
    if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
3823
        mode += 2;
3824
    if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
3825
        mode += 4;
3826
    if( mode >= pred_mode )
3827
        return mode + 1;
3828
    else
3829
        return mode;
3830
}
3831

    
3832
static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
3833
    MpegEncContext * const s = &h->s;
3834
    const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
3835
    const int mba_xy = mb_xy - 1;
3836
    const int mbb_xy = mb_xy - s->mb_stride;
3837

    
3838
    int ctx = 0;
3839

    
3840
    if( s->mb_x > 0 &&
3841
        ( IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) || IS_INTRA16x16( s->current_picture.mb_type[mba_xy] ) ) &&
3842
        h->chroma_pred_mode_table[mba_xy] != 0 ) {
3843
        ctx++;
3844
    }
3845
    if( s->mb_y > 0 &&
3846
        ( IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) || IS_INTRA16x16( s->current_picture.mb_type[mbb_xy] ) ) &&
3847
        h->chroma_pred_mode_table[mbb_xy] != 0 ) {
3848
        ctx++;
3849
    }
3850

    
3851
    if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
3852
        return 0;
3853

    
3854
    if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
3855
        return 1;
3856
    if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
3857
        return 2;
3858
    else
3859
        return 3;
3860
}
3861

    
3862
static const uint8_t block_idx_x[16] = {
3863
    0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
3864
};
3865
static const uint8_t block_idx_y[16] = {
3866
    0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
3867
};
3868
static const uint8_t block_idx_xy[4][4] = {
3869
    { 0, 2, 8,  10},
3870
    { 1, 3, 9,  11},
3871
    { 4, 6, 12, 14},
3872
    { 5, 7, 13, 15}
3873
};
3874

    
3875
static int decode_cabac_mb_cbp_luma( H264Context *h) {
3876
    MpegEncContext * const s = &h->s;
3877
    const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
3878

    
3879
    int cbp = 0;
3880
    int i8x8;
3881

    
3882
    h->cbp_table[mb_xy] = 0;  /* FIXME aaahahahah beurk */
3883

    
3884
    for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
3885
        int mba_xy = -1;
3886
        int mbb_xy = -1;
3887
        int x, y;
3888
        int ctx = 0;
3889

    
3890
        x = block_idx_x[4*i8x8];
3891
        y = block_idx_y[4*i8x8];
3892

    
3893
        if( x > 0 )
3894
            mba_xy = mb_xy;
3895
        else if( s->mb_x > 0 )
3896
            mba_xy = mb_xy - 1;
3897

    
3898
        if( y > 0 )
3899
            mbb_xy = mb_xy;
3900
        else if( s->mb_y > 0 )
3901
            mbb_xy = mb_xy - s->mb_stride;
3902

    
3903
        if( mba_xy >= 0 ) {
3904
            int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
3905
            if( IS_SKIP( s->current_picture.mb_type[mba_xy] ) || ((h->cbp_table[mba_xy] >> i8x8a)&0x01) == 0 )
3906
                ctx++;
3907
        }
3908

    
3909
        if( mbb_xy >= 0 ) {
3910
            int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
3911
            if( IS_SKIP( s->current_picture.mb_type[mbb_xy] ) || ((h->cbp_table[mbb_xy] >> i8x8b)&0x01) == 0 )
3912
                ctx += 2;
3913
        }
3914

    
3915
        if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
3916
            cbp |= 1 << i8x8;
3917
            h->cbp_table[mb_xy] = cbp;  /* FIXME aaahahahah beurk */
3918
        }
3919
    }
3920
    return cbp;
3921
}
3922
static int decode_cabac_mb_cbp_chroma( H264Context *h) {
3923
    MpegEncContext * const s = &h->s;
3924
    const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
3925
    int ctx;
3926
    int cbp_a, cbp_b;
3927

    
3928
    if( s->mb_x > 0 && !IS_SKIP( s->current_picture.mb_type[mb_xy-1] ) )
3929
        cbp_a = (h->cbp_table[mb_xy-1]>>4)&0x03;
3930
    else
3931
        cbp_a = -1;
3932

    
3933
    if( s->mb_y > 0 && !IS_SKIP( s->current_picture.mb_type[mb_xy-s->mb_stride] ) )
3934
        cbp_b = (h->cbp_table[mb_xy-s->mb_stride]>>4)&0x03;
3935
    else
3936
        cbp_b = -1;
3937

    
3938
    ctx = 0;
3939
    if( cbp_a > 0 ) ctx++;
3940
    if( cbp_b > 0 ) ctx += 2;
3941
    if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
3942
        return 0;
3943

    
3944
    ctx = 4;
3945
    if( cbp_a == 2 ) ctx++;
3946
    if( cbp_b == 2 ) ctx += 2;
3947
    if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) )
3948
        return 2;
3949
    else
3950
        return 1;
3951
}
3952
static int decode_cabac_mb_dqp( H264Context *h) {
3953
    MpegEncContext * const s = &h->s;
3954
    int mbn_xy;
3955
    int   ctx = 0;
3956
    int   val = 0;
3957

    
3958
    if( s->mb_x > 0 )
3959
        mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
3960
    else
3961
        mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
3962

    
3963
    if( mbn_xy >= 0 && h->last_qscale_diff != 0 && ( IS_INTRA16x16(s->current_picture.mb_type[mbn_xy] ) || (h->cbp_table[mbn_xy]&0x3f) ) )
3964
        ctx++;
3965

    
3966
    while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
3967
        if( ctx < 2 )
3968
            ctx = 2;
3969
        else
3970
            ctx = 3;
3971
        val++;
3972
    }
3973

    
3974
    if( val&0x01 )
3975
        return (val + 1)/2;
3976
    else
3977
        return -(val + 1)/2;
3978
}
3979

    
3980
static int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
3981
    MpegEncContext * const s = &h->s;
3982
    const int mb_xy  = s->mb_x + s->mb_y*s->mb_stride;
3983
    int mba_xy = -1;
3984
    int mbb_xy = -1;
3985

    
3986
    int nza = -1;
3987
    int nzb = -1;
3988
    int ctx = 0;
3989

    
3990
    if( cat == 0 ) {
3991
        if( s->mb_x > 0 ) {
3992
            mba_xy = mb_xy - 1;
3993
            if( IS_INTRA16x16(s->current_picture.mb_type[mba_xy] ) )
3994
                    nza = h->cbp_table[mba_xy]&0x100;
3995
        }
3996
        if( s->mb_y > 0 ) {
3997
            mbb_xy = mb_xy - s->mb_stride;
3998
            if( IS_INTRA16x16(s->current_picture.mb_type[mbb_xy] ) )
3999
                    nzb = h->cbp_table[mbb_xy]&0x100;
4000
        }
4001
    } else if( cat == 1 || cat == 2 ) {
4002
        int i8x8a, i8x8b;
4003
        int x, y;
4004

    
4005
        x = block_idx_x[idx];
4006
        y = block_idx_y[idx];
4007

    
4008
        if( x > 0 )
4009
            mba_xy = mb_xy;
4010
        else if( s->mb_x > 0 )
4011
            mba_xy = mb_xy - 1;
4012

    
4013
        if( y > 0 )
4014
            mbb_xy = mb_xy;
4015
        else if( s->mb_y > 0 )
4016
            mbb_xy = mb_xy - s->mb_stride;
4017

    
4018
        if( mba_xy >= 0 ) {
4019
            i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
4020

    
4021
            if( !IS_SKIP(s->current_picture.mb_type[mba_xy] ) &&
4022
                !IS_INTRA_PCM(s->current_picture.mb_type[mba_xy] ) &&
4023
                ((h->cbp_table[mba_xy]&0x0f)>>i8x8a))
4024
                nza = h->non_zero_count_cache[scan8[idx] - 1];
4025
        }
4026

    
4027
        if( mbb_xy >= 0 ) {
4028
            i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
4029

    
4030
            if( !IS_SKIP(s->current_picture.mb_type[mbb_xy] ) &&
4031
                !IS_INTRA_PCM(s->current_picture.mb_type[mbb_xy] ) &&
4032
                ((h->cbp_table[mbb_xy]&0x0f)>>i8x8b))
4033
                nzb = h->non_zero_count_cache[scan8[idx] - 8];
4034
        }
4035
    } else if( cat == 3 ) {
4036
        if( s->mb_x > 0 ) {
4037
            mba_xy = mb_xy - 1;
4038

    
4039
            if( !IS_SKIP(s->current_picture.mb_type[mba_xy] ) &&
4040
                !IS_INTRA_PCM(s->current_picture.mb_type[mba_xy] ) &&
4041
                (h->cbp_table[mba_xy]&0x30) )
4042
                nza = (h->cbp_table[mba_xy]>>(6+idx))&0x01;
4043
        }
4044
        if( s->mb_y > 0 ) {
4045
            mbb_xy = mb_xy - s->mb_stride;
4046

    
4047
            if( !IS_SKIP(s->current_picture.mb_type[mbb_xy] ) &&
4048
                !IS_INTRA_PCM(s->current_picture.mb_type[mbb_xy] ) &&
4049
                (h->cbp_table[mbb_xy]&0x30) )
4050
                nzb = (h->cbp_table[mbb_xy]>>(6+idx))&0x01;
4051
        }
4052
    } else if( cat == 4 ) {
4053
        int idxc = idx % 4 ;
4054
        if( idxc == 1 || idxc == 3 )
4055
            mba_xy = mb_xy;
4056
        else if( s->mb_x > 0 )
4057
            mba_xy = mb_xy -1;
4058

    
4059
        if( idxc == 2 || idxc == 3 )
4060
            mbb_xy = mb_xy;
4061
        else if( s->mb_y > 0 )
4062
            mbb_xy = mb_xy - s->mb_stride;
4063

    
4064
        if( mba_xy >= 0 &&
4065
            !IS_SKIP(s->current_picture.mb_type[mba_xy] ) &&
4066
            !IS_INTRA_PCM(s->current_picture.mb_type[mba_xy] ) &&
4067
            (h->cbp_table[mba_xy]&0x30) == 0x20 )
4068
            nza = h->non_zero_count_cache[scan8[16+idx] - 1];
4069

    
4070
        if( mbb_xy >= 0 &&
4071
            !IS_SKIP(s->current_picture.mb_type[mbb_xy] ) &&
4072
            !IS_INTRA_PCM(s->current_picture.mb_type[mbb_xy] ) &&
4073
            (h->cbp_table[mbb_xy]&0x30) == 0x20 )
4074
            nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
4075
    }
4076

    
4077
    if( ( mba_xy < 0 && IS_INTRA( s->current_picture.mb_type[mb_xy] ) ) ||
4078
        ( mba_xy >= 0 && IS_INTRA_PCM(s->current_picture.mb_type[mba_xy] ) ) ||
4079
          nza > 0 )
4080
        ctx++;
4081

    
4082
    if( ( mbb_xy < 0 && IS_INTRA( s->current_picture.mb_type[mb_xy] ) ) ||
4083
        ( mbb_xy >= 0 && IS_INTRA_PCM(s->current_picture.mb_type[mbb_xy] ) ) ||
4084
          nzb > 0 )
4085
        ctx += 2;
4086

    
4087
    return ctx + 4 * cat;
4088
}
4089

    
4090
static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int qp, int max_coeff) {
4091
    const int mb_xy  = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
4092
    const uint16_t *qmul= dequant_coeff[qp];
4093
    static const int significant_coeff_flag_offset[5] = { 0, 15, 29, 44, 47 };
4094
    static const int last_significant_coeff_flag_offset[5] = { 0, 15, 29, 44, 47 };
4095
    static const int coeff_abs_level_m1_offset[5] = { 0, 10, 20, 30, 39 };
4096

    
4097
    int coeff[16];
4098

    
4099
    int last = 0;
4100
    int coeff_count = 0;
4101
    int nz[16] = {0};
4102
    int i;
4103

    
4104
    int abslevel1 = 0;
4105
    int abslevelgt1 = 0;
4106

    
4107
    /* cat: 0-> DC 16x16  n = 0
4108
     *      1-> AC 16x16  n = luma4x4idx
4109
     *      2-> Luma4x4   n = luma4x4idx
4110
     *      3-> DC Chroma n = iCbCr
4111
     *      4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
4112
     */
4113

    
4114
    /* read coded block flag */
4115
    if( get_cabac( &h->cabac, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
4116
        if( cat == 1 || cat == 2 )
4117
            h->non_zero_count_cache[scan8[n]] = 0;
4118
        else if( cat == 4 )
4119
            h->non_zero_count_cache[scan8[16+n]] = 0;
4120

    
4121
        return 0;
4122
    }
4123

    
4124
    while( last < max_coeff - 1 ) {
4125
        int ctx = FFMIN( last, max_coeff - 2 );
4126

    
4127
        if( get_cabac( &h->cabac, &h->cabac_state[105+significant_coeff_flag_offset[cat]+ctx] ) == 0 ) {
4128
            nz[last++] = 0;
4129
        }
4130
        else {
4131
            nz[last++] = 1;
4132
            coeff_count++;
4133
            if( get_cabac( &h->cabac, &h->cabac_state[166+last_significant_coeff_flag_offset[cat]+ctx] ) ) {
4134
                while( last < max_coeff ) {
4135
                    nz[last++] = 0;
4136
                }
4137
                break;
4138
            }
4139
        }
4140
    }
4141
    if( last == max_coeff -1 ) {
4142
        nz[last++] = 1;
4143
        coeff_count++;
4144
    }
4145

    
4146
    if( cat == 0 && coeff_count > 0 )
4147
        h->cbp_table[mb_xy] |= 0x100;
4148
    else if( cat == 1 || cat == 2 )
4149
        h->non_zero_count_cache[scan8[n]] = coeff_count;
4150
    else if( cat == 3 && coeff_count > 0 )
4151
        h->cbp_table[mb_xy] |= 0x40 << n;
4152
    else if( cat == 4 )
4153
        h->non_zero_count_cache[scan8[16+n]] = coeff_count;
4154

    
4155
    for( i = coeff_count - 1; i >= 0; i-- ) {
4156
        int coeff_abs_m1;
4157

    
4158
        int ctx = (abslevelgt1 != 0 ? 0 : FFMIN( 4, abslevel1 + 1 )) + coeff_abs_level_m1_offset[cat];
4159

    
4160
        if( get_cabac( &h->cabac, &h->cabac_state[227+ctx] ) == 0 ) {
4161
            coeff_abs_m1 = 0;
4162
        } else {
4163
            coeff_abs_m1 = 1;
4164
            ctx = 5 + FFMIN( 4, abslevelgt1 ) + coeff_abs_level_m1_offset[cat];
4165
            while( coeff_abs_m1 < 14 && get_cabac( &h->cabac, &h->cabac_state[227+ctx] ) ) {
4166
                coeff_abs_m1++;
4167
            }
4168
        }
4169

    
4170
        if( coeff_abs_m1 >= 14 ) {
4171
            int j = 0;
4172
            while( get_cabac_bypass( &h->cabac ) ) {
4173
                coeff_abs_m1 += 1 << j;
4174
                j++;
4175
            }
4176

    
4177
            while( j-- ) {
4178
                if( get_cabac_bypass( &h->cabac ) )
4179
                    coeff_abs_m1 += 1 << j ;
4180
            }
4181
        }
4182
        if( get_cabac_bypass( &h->cabac ) )
4183
            coeff[i] = -1 *( coeff_abs_m1 + 1 );
4184
        else
4185
            coeff[i] = coeff_abs_m1 + 1;
4186

    
4187
        if( coeff_abs_m1 == 0 )
4188
            abslevel1++;
4189
        else
4190
            abslevelgt1++;
4191
    }
4192

    
4193
    if( cat == 0 || cat == 3 ) { /* DC */
4194
        int j;
4195
        for( i = 0, j = 0; j < coeff_count; i++ ) {
4196
            if( nz[i] ) {
4197
                block[scantable[i]] = coeff[j];
4198

    
4199
                j++;
4200
            }
4201
        }
4202

    
4203
    } else { /* AC */
4204
        int j;
4205
        for( i = 0, j = 0; j < coeff_count; i++ ) {
4206
            if( nz[i] ) {
4207
                block[scantable[i]] = coeff[j] * qmul[scantable[i]];
4208

    
4209
                j++;
4210
            }
4211
        }
4212
    }
4213
    return 0;
4214
}
4215

    
4216
/**
4217
 * decodes a macroblock
4218
 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4219
 */
4220
static int decode_mb_cabac(H264Context *h) {
4221
    MpegEncContext * const s = &h->s;
4222
    const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
4223
    int mb_type, partition_count, cbp = 0;
4224

    
4225
    s->dsp.clear_blocks(h->mb); //FIXME avoid if allready clear (move after skip handlong?)
4226

    
4227
    if( h->slice_type == B_TYPE ) {
4228
        av_log( h->s.avctx, AV_LOG_ERROR, "B-frame not supported with CABAC\n" );
4229
        return -1;
4230
    }
4231
    if( h->sps.mb_aff ) {
4232
        av_log( h->s.avctx, AV_LOG_ERROR, "Fields not supported with CABAC\n" );
4233
        return -1;
4234
    }
4235

    
4236
    if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
4237
        /* read skip flags */
4238
        if( decode_cabac_mb_skip( h ) ) {
4239
            int mx, my;
4240

    
4241
            /* skip mb */
4242
            mb_type= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4243

    
4244
            memset(h->non_zero_count[mb_xy], 0, 16);
4245
            memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4246
#if 0
4247
            if(h->sps.mb_aff && s->mb_skip_run==0 && (s->mb_y&1)==0){
4248
                h->mb_field_decoding_flag= get_bits1(&s->gb);
4249
            }
4250
            if(h->mb_field_decoding_flag)
4251
                mb_type|= MB_TYPE_INTERLACED;
4252
#endif
4253

    
4254
            fill_caches(h, mb_type); //FIXME check what is needed and what not ...
4255
            pred_pskip_motion(h, &mx, &my);
4256
            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4257
            fill_rectangle(  h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4258
            write_back_motion(h, mb_type);
4259

    
4260
            s->current_picture.mb_type[mb_xy]= mb_type; //FIXME SKIP type
4261
            s->current_picture.qscale_table[mb_xy]= s->qscale;
4262
            h->slice_table[ mb_xy ]= h->slice_num;
4263
            h->cbp_table[mb_xy] = 0;
4264
            h->last_qscale_diff = 0;
4265

    
4266
            h->prev_mb_skiped= 1;
4267

    
4268
            return 0;
4269

    
4270
        }
4271
    }
4272
    h->prev_mb_skiped = 0;
4273

    
4274
    if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
4275
        av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
4276
        return -1;
4277
    }
4278
    //av_log( s->avctx, AV_LOG_ERROR, "mb_type=%d\n", mb_type );
4279

    
4280
    if( h->slice_type == P_TYPE ) {
4281
        if( mb_type < 5) {
4282
            partition_count= p_mb_type_info[mb_type].partition_count;
4283
            mb_type=         p_mb_type_info[mb_type].type;
4284
            av_log( h->s.avctx, AV_LOG_ERROR, "gni P-type not yet supported\n" );
4285
            return -1;
4286
        } else {
4287
            mb_type -= 5;
4288
            goto decode_intra_mb;
4289
        }
4290
    } else {
4291
       assert(h->slice_type == I_TYPE);
4292
decode_intra_mb:
4293
        partition_count = 0;
4294
        cbp= i_mb_type_info[mb_type].cbp;
4295
        h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4296
        mb_type= i_mb_type_info[mb_type].type;
4297
    }
4298
#if 0
4299
    if(h->mb_field_decoding_flag)
4300
        mb_type |= MB_TYPE_INTERLACED;
4301
#endif
4302

    
4303
    s->current_picture.mb_type[mb_xy]= mb_type;
4304
    h->slice_table[ mb_xy ]= h->slice_num;
4305

    
4306
    if(IS_INTRA_PCM(mb_type)) {
4307
        /* TODO */
4308
        h->cbp_table[mb_xy] = 0xf +4*2;
4309
        s->current_picture.qscale_table[mb_xy]= s->qscale;
4310
        return -1;
4311
    }
4312

    
4313
    fill_caches(h, mb_type);
4314

    
4315
    if( IS_INTRA( mb_type ) ) {
4316
        if( IS_INTRA4x4( mb_type ) ) {
4317
            int i;
4318
            for( i = 0; i < 16; i++ ) {
4319
                int pred = pred_intra_mode( h, i );
4320
                h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
4321

    
4322
                //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
4323
            }
4324
            write_back_intra_pred_mode(h);
4325
            if( check_intra4x4_pred_mode(h) < 0 ) return -1;
4326
        } else {
4327
            h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
4328
            if( h->intra16x16_pred_mode < 0 ) return -1;
4329
        }
4330
        h->chroma_pred_mode_table[mb_xy] =
4331
            h->chroma_pred_mode          = decode_cabac_mb_chroma_pre_mode( h );
4332

    
4333
        h->chroma_pred_mode= check_intra_pred_mode( h, h->chroma_pred_mode );
4334
        if( h->chroma_pred_mode < 0 ) return -1;
4335
    } else if( partition_count == 4 ) {
4336
        /* TODO */
4337
        return -1;
4338
    } else if( !IS_DIRECT(mb_type) ) {
4339
        /* TODO */
4340
        return -1;
4341
    }
4342

    
4343
   if( IS_INTER( mb_type ) )
4344
        write_back_motion( h, mb_type );
4345

    
4346
    if( !IS_INTRA16x16( mb_type ) ) {
4347
        cbp  = decode_cabac_mb_cbp_luma( h );
4348
        cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
4349
    }
4350

    
4351
    //av_log( NULL, AV_LOG_ERROR, "cbp=%d\n", cbp );
4352
    h->cbp_table[mb_xy] = cbp;
4353

    
4354
    if( cbp || IS_INTRA16x16( mb_type ) ) {
4355
        const uint8_t *scan, *dc_scan;
4356
        int dqp;
4357

    
4358
        if(IS_INTERLACED(mb_type)){
4359
            scan= field_scan;
4360
            dc_scan= luma_dc_field_scan;
4361
        }else{
4362
            scan= zigzag_scan;
4363
            dc_scan= luma_dc_zigzag_scan;
4364
        }
4365

    
4366
        h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
4367
        s->qscale += dqp;
4368
        if(((unsigned)s->qscale) > 51){
4369
            if(s->qscale<0) s->qscale+= 52;
4370
            else            s->qscale-= 52;
4371
        }
4372
        h->chroma_qp = get_chroma_qp(h, s->qscale);
4373

    
4374
        if( IS_INTRA16x16( mb_type ) ) {
4375
            int i;
4376
            //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
4377
            if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, s->qscale, 16) < 0)
4378
                return -1;
4379
            if( cbp&15 ) {
4380
                for( i = 0; i < 16; i++ ) {
4381
                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
4382
                    if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, s->qscale, 15) < 0 )
4383
                        return -1;
4384
                }
4385
            } else {
4386
                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4387
            }
4388
        } else {
4389
            int i8x8, i4x4;
4390
            for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
4391
                if( cbp & (1<<i8x8) ) {
4392
                    for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
4393
                        const int index = 4*i8x8 + i4x4;
4394
                        //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
4395
                        if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, s->qscale, 16) < 0 )
4396
                            return -1;
4397
                    }
4398
                } else {
4399
                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4400
                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4401
                }
4402
            }
4403
        }
4404

    
4405
        if( cbp&0x30 ){
4406
            int c;
4407
            for( c = 0; c < 2; c++ ) {
4408
                //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
4409
                if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, h->chroma_qp, 4) < 0)
4410
                    return -1;
4411
            }
4412
        }
4413

    
4414
        if( cbp&0x20 ) {
4415
            int c, i;
4416
            for( c = 0; c < 2; c++ ) {
4417
                for( i = 0; i < 4; i++ ) {
4418
                    const int index = 16 + 4 * c + i;
4419
                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
4420
                    if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->chroma_qp, 15) < 0)
4421
                        return -1;
4422
                }
4423
            }
4424
        } else {
4425
            uint8_t * const nnz= &h->non_zero_count_cache[0];
4426
            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4427
            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4428
        }
4429
    } else {
4430
        memset( &h->non_zero_count_cache[8], 0, 8*5 );
4431
    }
4432

    
4433
    s->current_picture.qscale_table[mb_xy]= s->qscale;
4434
    write_back_non_zero_count(h);
4435

    
4436
    return 0;
4437
}
4438

    
4439

    
4440
static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
4441
    int i, d;
4442
    const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
4443
    const int alpha = alpha_table[index_a];
4444
    const int beta  = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
4445

    
4446
    for( i = 0; i < 4; i++ ) {
4447
        if( bS[i] == 0 ) {
4448
            pix += 4 * stride;
4449
            continue;
4450
        }
4451

    
4452
        if( bS[i] < 4 ) {
4453
            const int tc0 = tc0_table[index_a][bS[i] - 1];
4454
            /* 4px edge length */
4455
            for( d = 0; d < 4; d++ ) {
4456
                const int p0 = pix[-1];
4457
                const int p1 = pix[-2];
4458
                const int p2 = pix[-3];
4459
                const int q0 = pix[0];
4460
                const int q1 = pix[1];
4461
                const int q2 = pix[2];
4462

    
4463
                if( ABS( p0 - q0 ) < alpha &&
4464
                    ABS( p1 - p0 ) < beta &&
4465
                    ABS( q1 - q0 ) < beta ) {
4466
                    int tc = tc0;
4467
                    int i_delta;
4468

    
4469
                    if( ABS( p2 - p0 ) < beta ) {
4470
                        pix[-2] = p1 + clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
4471
                        tc++;
4472
                    }
4473
                    if( ABS( q2 - q0 ) < beta ) {
4474
                        pix[1] = q1 + clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
4475
                        tc++;
4476
                    }
4477

    
4478
                    i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
4479
                    pix[-1] = clip_uint8( p0 + i_delta );    /* p0' */
4480
                    pix[0]  = clip_uint8( q0 - i_delta );    /* q0' */
4481
                }
4482
                pix += stride;
4483
            }
4484
        }else{
4485
            /* 4px edge length */
4486
            for( d = 0; d < 4; d++ ) {
4487
                const int p0 = pix[-1];
4488
                const int p1 = pix[-2];
4489
                const int p2 = pix[-3];
4490

    
4491
                const int q0 = pix[0];
4492
                const int q1 = pix[1];
4493
                const int q2 = pix[2];
4494

    
4495
                if( ABS( p0 - q0 ) < alpha &&
4496
                    ABS( p1 - p0 ) < beta &&
4497
                    ABS( q1 - q0 ) < beta ) {
4498

    
4499
                    if(ABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
4500
                        if( ABS( p2 - p0 ) < beta)
4501
                        {
4502
                            const int p3 = pix[-4];
4503
                            /* p0', p1', p2' */
4504
                            pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
4505
                            pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
4506
                            pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
4507
                        } else {
4508
                            /* p0' */
4509
                            pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
4510
                        }
4511
                        if( ABS( q2 - q0 ) < beta)
4512
                        {
4513
                            const int q3 = pix[3];
4514
                            /* q0', q1', q2' */
4515
                            pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
4516
                            pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
4517
                            pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
4518
                        } else {
4519
                            /* q0' */
4520
                            pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
4521
                        }
4522
                    }else{
4523
                        /* p0', q0' */
4524
                        pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
4525
                        pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
4526
                    }
4527
                }
4528
                pix += stride;
4529
            }
4530
        }
4531
    }
4532
}
4533
static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int bS[4], int qp ) {
4534
    int i, d;
4535
    const int index_a = clip( qp + h->slice_alpha_c0_offset, 0, 51 );
4536
    const int alpha = alpha_table[index_a];
4537
    const int beta  = beta_table[clip( qp + h->slice_beta_offset, 0, 51 )];
4538

    
4539
    for( i = 0; i < 4; i++ ) {
4540
        if( bS[i] == 0 ) {
4541
            pix += 2 * stride;
4542
            continue;
4543
        }
4544

    
4545
        if( bS[i] < 4 ) {
4546
            const int tc = tc0_table[index_a][bS[i] - 1] + 1;
4547
            /* 2px edge length (because we use same bS than the one for luma) */
4548
            for( d = 0; d < 2; d++ ){
4549
                const int p0 = pix[-1];
4550
                const int p1 = pix[-2];
4551
                const int q0 = pix[0];
4552
                const int q1 = pix[1];
4553

    
4554
                if( ABS( p0 - q0 ) < alpha &&
4555
                    ABS( p1 - p0 ) < beta &&
4556
                    ABS( q1 - q0 ) < beta ) {
4557
                    const int i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
4558

    
4559
                    pix[-1] = clip_uint8( p0 + i_delta );    /* p0' */
4560
                    pix[0]  = clip_uint8( q0 - i_delta );    /* q0' */
4561
                }
4562
                pix += stride;
4563
            }
4564
        }else{