Statistics
| Branch: | Revision:

ffmpeg / libavcodec / h264.c @ 6f3c50f2

History | View | Annotate | Download (306 KB)

1 0da71265 Michael Niedermayer
/*
2
 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4
 *
5 b78e7197 Diego Biurrun
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8 0da71265 Michael Niedermayer
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10 b78e7197 Diego Biurrun
 * version 2.1 of the License, or (at your option) any later version.
11 0da71265 Michael Niedermayer
 *
12 b78e7197 Diego Biurrun
 * FFmpeg is distributed in the hope that it will be useful,
13 0da71265 Michael Niedermayer
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18 b78e7197 Diego Biurrun
 * License along with FFmpeg; if not, write to the Free Software
19 5509bffa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 0da71265 Michael Niedermayer
 */
21 115329f1 Diego Biurrun
22 0da71265 Michael Niedermayer
/**
23
 * @file h264.c
24
 * H.264 / AVC / MPEG4 part10 codec.
25
 * @author Michael Niedermayer <michaelni@gmx.at>
26
 */
27
28
#include "dsputil.h"
29
#include "avcodec.h"
30
#include "mpegvideo.h"
31 26b4fe82 Aurelien Jacobs
#include "h264.h"
32 0da71265 Michael Niedermayer
#include "h264data.h"
33 26b4fe82 Aurelien Jacobs
#include "h264_parser.h"
34 0da71265 Michael Niedermayer
#include "golomb.h"
35 626464fb Kostya Shishkov
#include "rectangle.h"
36 0da71265 Michael Niedermayer
37 e5017ab8 Laurent Aimar
#include "cabac.h"
38 52cb7981 Jeff Downs
#ifdef ARCH_X86
39 a6493a8f Diego Biurrun
#include "x86/h264_i386.h"
40 52cb7981 Jeff Downs
#endif
41 e5017ab8 Laurent Aimar
42 2848ce84 Loren Merritt
//#undef NDEBUG
43 0da71265 Michael Niedermayer
#include <assert.h>
44
45 2ddcf84b Jeff Downs
/**
46
 * Value of Picture.reference when Picture is not a reference picture, but
47
 * is held for delayed output.
48
 */
49
#define DELAYED_PIC_REF 4
50
51 0da71265 Michael Niedermayer
static VLC coeff_token_vlc[4];
52 910e3668 Art Clarke
static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
53
static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
54
55 0da71265 Michael Niedermayer
static VLC chroma_dc_coeff_token_vlc;
56 910e3668 Art Clarke
static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
57
static const int chroma_dc_coeff_token_vlc_table_size = 256;
58 0da71265 Michael Niedermayer
59
static VLC total_zeros_vlc[15];
60 910e3668 Art Clarke
static VLC_TYPE total_zeros_vlc_tables[15][512][2];
61
static const int total_zeros_vlc_tables_size = 512;
62
63 0da71265 Michael Niedermayer
static VLC chroma_dc_total_zeros_vlc[3];
64 910e3668 Art Clarke
static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
65
static const int chroma_dc_total_zeros_vlc_tables_size = 8;
66 0da71265 Michael Niedermayer
67
static VLC run_vlc[6];
68 910e3668 Art Clarke
static VLC_TYPE run_vlc_tables[6][8][2];
69
static const int run_vlc_tables_size = 8;
70
71 0da71265 Michael Niedermayer
static VLC run7_vlc;
72 910e3668 Art Clarke
static VLC_TYPE run7_vlc_table[96][2];
73
static const int run7_vlc_table_size = 96;
74 0da71265 Michael Niedermayer
75 8b82a956 Michael Niedermayer
static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
76
static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
77 6ba71fc4 Loïc Le Loarer
static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
78 3e20143e Loren Merritt
static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 9c0e4624 Michael Niedermayer
static Picture * remove_long(H264Context *h, int i, int ref_mask);
80 8b82a956 Michael Niedermayer
81 849f1035 Måns Rullgård
static av_always_inline uint32_t pack16to32(int a, int b){
82 377ec888 Michael Niedermayer
#ifdef WORDS_BIGENDIAN
83
   return (b&0xFFFF) + (a<<16);
84
#else
85
   return (a&0xFFFF) + (b<<16);
86
#endif
87
}
88
89 d9ec210b Diego Pettenò
static const uint8_t rem6[52]={
90 acd8d10f Panagiotis Issaris
0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
91
};
92
93 d9ec210b Diego Pettenò
static const uint8_t div6[52]={
94 acd8d10f Panagiotis Issaris
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
95
};
96
97 143d7f14 Paul Kendall
static const int left_block_options[4][8]={
98
    {0,1,2,3,7,10,8,11},
99
    {2,2,3,3,8,11,8,11},
100
    {0,0,1,1,7,10,7,10},
101
    {0,2,0,2,7,10,7,10}
102
};
103 acd8d10f Panagiotis Issaris
104 70abb407 Loren Merritt
static void fill_caches(H264Context *h, int mb_type, int for_deblock){
105 0da71265 Michael Niedermayer
    MpegEncContext * const s = &h->s;
106 64514ee8 Alexander Strange
    const int mb_xy= h->mb_xy;
107 0da71265 Michael Niedermayer
    int topleft_xy, top_xy, topright_xy, left_xy[2];
108
    int topleft_type, top_type, topright_type, left_type[2];
109 cac55c91 Anders Grönberg
    const int * left_block;
110 02f7695b Loren Merritt
    int topleft_partition= -1;
111 0da71265 Michael Niedermayer
    int i;
112
113 36e097bc Jeff Downs
    top_xy     = mb_xy  - (s->mb_stride << FIELD_PICTURE);
114
115 717b1733 Loren Merritt
    //FIXME deblocking could skip the intra and nnz parts.
116 36e097bc Jeff Downs
    if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
117 e2e5894a Loren Merritt
        return;
118
119 2cab6401 Diego Biurrun
    /* Wow, what a mess, why didn't they simplify the interlacing & intra
120
     * stuff, I can't imagine that these complex rules are worth it. */
121 115329f1 Diego Biurrun
122 6867a90b Loic Le Loarer
    topleft_xy = top_xy - 1;
123
    topright_xy= top_xy + 1;
124
    left_xy[1] = left_xy[0] = mb_xy-1;
125 143d7f14 Paul Kendall
    left_block = left_block_options[0];
126 5d18eaad Loren Merritt
    if(FRAME_MBAFF){
127 6867a90b Loic Le Loarer
        const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
128
        const int top_pair_xy      = pair_xy     - s->mb_stride;
129
        const int topleft_pair_xy  = top_pair_xy - 1;
130
        const int topright_pair_xy = top_pair_xy + 1;
131 6f3c50f2 Michael Niedermayer
        const int topleft_mb_field_flag  = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
132
        const int top_mb_field_flag      = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
133
        const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
134
        const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
135
        const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
136 6867a90b Loic Le Loarer
        const int bottom = (s->mb_y & 1);
137 6f3c50f2 Michael Niedermayer
        tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
138 60c6ba7a Michael Niedermayer
139 6f3c50f2 Michael Niedermayer
        if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
140 6867a90b Loic Le Loarer
            top_xy -= s->mb_stride;
141
        }
142 6f3c50f2 Michael Niedermayer
        if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
143 6867a90b Loic Le Loarer
            topleft_xy -= s->mb_stride;
144 6f3c50f2 Michael Niedermayer
        } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
145 02f7695b Loren Merritt
            topleft_xy += s->mb_stride;
146 1412060e Diego Biurrun
            // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
147 02f7695b Loren Merritt
            topleft_partition = 0;
148 6867a90b Loic Le Loarer
        }
149 6f3c50f2 Michael Niedermayer
        if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
150 6867a90b Loic Le Loarer
            topright_xy -= s->mb_stride;
151
        }
152 6f3c50f2 Michael Niedermayer
        if (left_mb_field_flag != curr_mb_field_flag) {
153 6867a90b Loic Le Loarer
            left_xy[1] = left_xy[0] = pair_xy - 1;
154 6f3c50f2 Michael Niedermayer
            if (curr_mb_field_flag) {
155
                left_xy[1] += s->mb_stride;
156
                left_block = left_block_options[3];
157
            } else {
158 6867a90b Loic Le Loarer
                if (bottom) {
159 143d7f14 Paul Kendall
                    left_block = left_block_options[1];
160 6867a90b Loic Le Loarer
                } else {
161 143d7f14 Paul Kendall
                    left_block= left_block_options[2];
162 6867a90b Loic Le Loarer
                }
163
            }
164
        }
165 0da71265 Michael Niedermayer
    }
166
167 826de46e Loïc Le Loarer
    h->top_mb_xy = top_xy;
168
    h->left_mb_xy[0] = left_xy[0];
169
    h->left_mb_xy[1] = left_xy[1];
170 6ba71fc4 Loïc Le Loarer
    if(for_deblock){
171 717b1733 Loren Merritt
        topleft_type = 0;
172
        topright_type = 0;
173 b735aeea Michael Niedermayer
        top_type     = h->slice_table[top_xy     ] < 0xFFFF ? s->current_picture.mb_type[top_xy]     : 0;
174
        left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
175
        left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
176 5d18eaad Loren Merritt
177 e248cb60 Michael Niedermayer
        if(MB_MBAFF && !IS_INTRA(mb_type)){
178 5d18eaad Loren Merritt
            int list;
179 3425501d Michael Niedermayer
            for(list=0; list<h->list_count; list++){
180 e248cb60 Michael Niedermayer
                //These values where changed for ease of performing MC, we need to change them back
181
                //FIXME maybe we can make MC and loop filter use the same values or prevent
182
                //the MC code from changing ref_cache and rather use a temporary array.
183 5d18eaad Loren Merritt
                if(USES_LIST(mb_type,list)){
184 191e8ca7 Måns Rullgård
                    int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
185 5d18eaad Loren Merritt
                    *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
186 beca9a28 Michael Niedermayer
                    *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
187 5d18eaad Loren Merritt
                    ref += h->b8_stride;
188
                    *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
189 beca9a28 Michael Niedermayer
                    *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
190 5d18eaad Loren Merritt
                }
191
            }
192
        }
193 46f2f05f Michael Niedermayer
    }else{
194
        topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
195
        top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
196
        topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
197
        left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
198
        left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
199 0da71265 Michael Niedermayer
200
    if(IS_INTRA(mb_type)){
201 faa7e394 Michael Niedermayer
        int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
202 115329f1 Diego Biurrun
        h->topleft_samples_available=
203
        h->top_samples_available=
204 0da71265 Michael Niedermayer
        h->left_samples_available= 0xFFFF;
205
        h->topright_samples_available= 0xEEEA;
206
207 faa7e394 Michael Niedermayer
        if(!(top_type & type_mask)){
208 0da71265 Michael Niedermayer
            h->topleft_samples_available= 0xB3FF;
209
            h->top_samples_available= 0x33FF;
210
            h->topright_samples_available= 0x26EA;
211
        }
212 d1d10e91 Michael Niedermayer
        if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
213
            if(IS_INTERLACED(mb_type)){
214 faa7e394 Michael Niedermayer
                if(!(left_type[0] & type_mask)){
215 d1d10e91 Michael Niedermayer
                    h->topleft_samples_available&= 0xDFFF;
216
                    h->left_samples_available&= 0x5FFF;
217
                }
218 faa7e394 Michael Niedermayer
                if(!(left_type[1] & type_mask)){
219 d1d10e91 Michael Niedermayer
                    h->topleft_samples_available&= 0xFF5F;
220
                    h->left_samples_available&= 0xFF5F;
221
                }
222
            }else{
223
                int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
224
                                ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
225
                assert(left_xy[0] == left_xy[1]);
226 faa7e394 Michael Niedermayer
                if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
227 d1d10e91 Michael Niedermayer
                    h->topleft_samples_available&= 0xDF5F;
228
                    h->left_samples_available&= 0x5F5F;
229
                }
230
            }
231
        }else{
232 faa7e394 Michael Niedermayer
            if(!(left_type[0] & type_mask)){
233 0da71265 Michael Niedermayer
                h->topleft_samples_available&= 0xDF5F;
234
                h->left_samples_available&= 0x5F5F;
235
            }
236
        }
237 115329f1 Diego Biurrun
238 faa7e394 Michael Niedermayer
        if(!(topleft_type & type_mask))
239 0da71265 Michael Niedermayer
            h->topleft_samples_available&= 0x7FFF;
240 115329f1 Diego Biurrun
241 faa7e394 Michael Niedermayer
        if(!(topright_type & type_mask))
242 0da71265 Michael Niedermayer
            h->topright_samples_available&= 0xFBFF;
243 115329f1 Diego Biurrun
244 0da71265 Michael Niedermayer
        if(IS_INTRA4x4(mb_type)){
245
            if(IS_INTRA4x4(top_type)){
246
                h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
247
                h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
248
                h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
249
                h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
250
            }else{
251
                int pred;
252 faa7e394 Michael Niedermayer
                if(!(top_type & type_mask))
253 0da71265 Michael Niedermayer
                    pred= -1;
254 6fbcaaa0 Loic Le Loarer
                else{
255
                    pred= 2;
256 0da71265 Michael Niedermayer
                }
257
                h->intra4x4_pred_mode_cache[4+8*0]=
258
                h->intra4x4_pred_mode_cache[5+8*0]=
259
                h->intra4x4_pred_mode_cache[6+8*0]=
260
                h->intra4x4_pred_mode_cache[7+8*0]= pred;
261
            }
262
            for(i=0; i<2; i++){
263
                if(IS_INTRA4x4(left_type[i])){
264
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
265
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
266
                }else{
267
                    int pred;
268 faa7e394 Michael Niedermayer
                    if(!(left_type[i] & type_mask))
269 0da71265 Michael Niedermayer
                        pred= -1;
270 6fbcaaa0 Loic Le Loarer
                    else{
271
                        pred= 2;
272 0da71265 Michael Niedermayer
                    }
273
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
274
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
275
                }
276
            }
277
        }
278
    }
279 29671011 Michael Niedermayer
    }
280 115329f1 Diego Biurrun
281
282 0da71265 Michael Niedermayer
/*
283 115329f1 Diego Biurrun
0 . T T. T T T T
284
1 L . .L . . . .
285
2 L . .L . . . .
286
3 . T TL . . . .
287
4 L . .L . . . .
288
5 L . .. . . . .
289 0da71265 Michael Niedermayer
*/
290 1412060e Diego Biurrun
//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
291 0da71265 Michael Niedermayer
    if(top_type){
292 6867a90b Loic Le Loarer
        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
293
        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
294
        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
295 53c05b1e Michael Niedermayer
        h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
296 115329f1 Diego Biurrun
297 6867a90b Loic Le Loarer
        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
298 53c05b1e Michael Niedermayer
        h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
299 115329f1 Diego Biurrun
300 6867a90b Loic Le Loarer
        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
301 53c05b1e Michael Niedermayer
        h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
302 115329f1 Diego Biurrun
303 0da71265 Michael Niedermayer
    }else{
304 115329f1 Diego Biurrun
        h->non_zero_count_cache[4+8*0]=
305 0da71265 Michael Niedermayer
        h->non_zero_count_cache[5+8*0]=
306
        h->non_zero_count_cache[6+8*0]=
307
        h->non_zero_count_cache[7+8*0]=
308 115329f1 Diego Biurrun
309 0da71265 Michael Niedermayer
        h->non_zero_count_cache[1+8*0]=
310
        h->non_zero_count_cache[2+8*0]=
311 115329f1 Diego Biurrun
312 0da71265 Michael Niedermayer
        h->non_zero_count_cache[1+8*3]=
313 3981c385 Michael Niedermayer
        h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
314 115329f1 Diego Biurrun
315 0da71265 Michael Niedermayer
    }
316 826de46e Loïc Le Loarer
317 6867a90b Loic Le Loarer
    for (i=0; i<2; i++) {
318
        if(left_type[i]){
319
            h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
320
            h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
321
            h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
322
            h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
323
        }else{
324 115329f1 Diego Biurrun
            h->non_zero_count_cache[3+8*1 + 2*8*i]=
325
            h->non_zero_count_cache[3+8*2 + 2*8*i]=
326
            h->non_zero_count_cache[0+8*1 +   8*i]=
327 6867a90b Loic Le Loarer
            h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
328 826de46e Loïc Le Loarer
        }
329
    }
330
331
    if( h->pps.cabac ) {
332
        // top_cbp
333
        if(top_type) {
334
            h->top_cbp = h->cbp_table[top_xy];
335
        } else if(IS_INTRA(mb_type)) {
336
            h->top_cbp = 0x1C0;
337
        } else {
338
            h->top_cbp = 0;
339
        }
340
        // left_cbp
341
        if (left_type[0]) {
342
            h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
343
        } else if(IS_INTRA(mb_type)) {
344
            h->left_cbp = 0x1C0;
345
        } else {
346
            h->left_cbp = 0;
347
        }
348
        if (left_type[0]) {
349
            h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
350
        }
351
        if (left_type[1]) {
352
            h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
353 6867a90b Loic Le Loarer
        }
354 0da71265 Michael Niedermayer
    }
355 6867a90b Loic Le Loarer
356 0da71265 Michael Niedermayer
#if 1
357 e2e5894a Loren Merritt
    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
358 0da71265 Michael Niedermayer
        int list;
359 3425501d Michael Niedermayer
        for(list=0; list<h->list_count; list++){
360 e2e5894a Loren Merritt
            if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
361 0da71265 Michael Niedermayer
                /*if(!h->mv_cache_clean[list]){
362
                    memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
363
                    memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
364
                    h->mv_cache_clean[list]= 1;
365
                }*/
366 5ad984c9 Loren Merritt
                continue;
367 0da71265 Michael Niedermayer
            }
368
            h->mv_cache_clean[list]= 0;
369 115329f1 Diego Biurrun
370 53b19144 Loren Merritt
            if(USES_LIST(top_type, list)){
371 0da71265 Michael Niedermayer
                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
372
                const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
373
                *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
374
                *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
375
                *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
376
                *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
377
                h->ref_cache[list][scan8[0] + 0 - 1*8]=
378
                h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
379
                h->ref_cache[list][scan8[0] + 2 - 1*8]=
380
                h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
381
            }else{
382 115329f1 Diego Biurrun
                *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
383
                *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
384
                *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
385 0da71265 Michael Niedermayer
                *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
386
                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
387
            }
388
389 4672503d Loren Merritt
            for(i=0; i<2; i++){
390
                int cache_idx = scan8[0] - 1 + i*2*8;
391
                if(USES_LIST(left_type[i], list)){
392
                    const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
393
                    const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
394
                    *(uint32_t*)h->mv_cache[list][cache_idx  ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
395
                    *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
396
                    h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
397
                    h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
398
                }else{
399
                    *(uint32_t*)h->mv_cache [list][cache_idx  ]=
400
                    *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
401
                    h->ref_cache[list][cache_idx  ]=
402
                    h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
403
                }
404 0da71265 Michael Niedermayer
            }
405
406 0281d325 Michael Niedermayer
            if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
407 46f2f05f Michael Niedermayer
                continue;
408
409 53b19144 Loren Merritt
            if(USES_LIST(topleft_type, list)){
410 02f7695b Loren Merritt
                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
411
                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
412 e2e5894a Loren Merritt
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
413
                h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
414
            }else{
415
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
416
                h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
417
            }
418 115329f1 Diego Biurrun
419 53b19144 Loren Merritt
            if(USES_LIST(topright_type, list)){
420 e2e5894a Loren Merritt
                const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
421
                const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
422
                *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
423
                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
424
            }else{
425
                *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
426
                h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427
            }
428
429 ae08a563 Loren Merritt
            if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
430 717b1733 Loren Merritt
                continue;
431 115329f1 Diego Biurrun
432
            h->ref_cache[list][scan8[5 ]+1] =
433
            h->ref_cache[list][scan8[7 ]+1] =
434 3b66c4c5 Kevin Baragona
            h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
435 115329f1 Diego Biurrun
            h->ref_cache[list][scan8[4 ]] =
436 0da71265 Michael Niedermayer
            h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
437
            *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
438
            *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
439 3b66c4c5 Kevin Baragona
            *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
440 0da71265 Michael Niedermayer
            *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
441
            *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
442 9e528114 Laurent Aimar
443
            if( h->pps.cabac ) {
444
                /* XXX beurk, Load mvd */
445 53b19144 Loren Merritt
                if(USES_LIST(top_type, list)){
446 9e528114 Laurent Aimar
                    const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
447
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
448
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
449
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
450
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
451
                }else{
452 115329f1 Diego Biurrun
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
453
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
454
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
455 9e528114 Laurent Aimar
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
456
                }
457 53b19144 Loren Merritt
                if(USES_LIST(left_type[0], list)){
458 9e528114 Laurent Aimar
                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
459
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
460
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
461
                }else{
462
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
463
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
464
                }
465 53b19144 Loren Merritt
                if(USES_LIST(left_type[1], list)){
466 9e528114 Laurent Aimar
                    const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
467
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
468
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
469
                }else{
470
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
471
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
472
                }
473
                *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
474
                *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
475 3b66c4c5 Kevin Baragona
                *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
476 9e528114 Laurent Aimar
                *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
477
                *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
478 5ad984c9 Loren Merritt
479 9f5c1037 Michael Niedermayer
                if(h->slice_type_nos == FF_B_TYPE){
480 5ad984c9 Loren Merritt
                    fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
481
482
                    if(IS_DIRECT(top_type)){
483
                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
484
                    }else if(IS_8X8(top_type)){
485
                        int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
486
                        h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
487
                        h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
488
                    }else{
489
                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
490
                    }
491 115329f1 Diego Biurrun
492 5d18eaad Loren Merritt
                    if(IS_DIRECT(left_type[0]))
493
                        h->direct_cache[scan8[0] - 1 + 0*8]= 1;
494
                    else if(IS_8X8(left_type[0]))
495
                        h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
496
                    else
497
                        h->direct_cache[scan8[0] - 1 + 0*8]= 0;
498
499
                    if(IS_DIRECT(left_type[1]))
500 5ad984c9 Loren Merritt
                        h->direct_cache[scan8[0] - 1 + 2*8]= 1;
501 5d18eaad Loren Merritt
                    else if(IS_8X8(left_type[1]))
502
                        h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
503
                    else
504 5ad984c9 Loren Merritt
                        h->direct_cache[scan8[0] - 1 + 2*8]= 0;
505 5d18eaad Loren Merritt
                }
506
            }
507
508
            if(FRAME_MBAFF){
509
#define MAP_MVS\
510
                    MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
511
                    MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
512
                    MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
513
                    MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
514
                    MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
515
                    MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
516
                    MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
517
                    MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
518
                    MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
519
                    MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
520
                if(MB_FIELD){
521
#define MAP_F2F(idx, mb_type)\
522
                    if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
523
                        h->ref_cache[list][idx] <<= 1;\
524
                        h->mv_cache[list][idx][1] /= 2;\
525
                        h->mvd_cache[list][idx][1] /= 2;\
526
                    }
527
                    MAP_MVS
528
#undef MAP_F2F
529
                }else{
530
#define MAP_F2F(idx, mb_type)\
531
                    if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
532
                        h->ref_cache[list][idx] >>= 1;\
533
                        h->mv_cache[list][idx][1] <<= 1;\
534
                        h->mvd_cache[list][idx][1] <<= 1;\
535 5ad984c9 Loren Merritt
                    }
536 5d18eaad Loren Merritt
                    MAP_MVS
537
#undef MAP_F2F
538 5ad984c9 Loren Merritt
                }
539 9e528114 Laurent Aimar
            }
540 0da71265 Michael Niedermayer
        }
541
    }
542
#endif
543 43efd19a Loren Merritt
544
    h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
545 0da71265 Michael Niedermayer
}
546
547
static inline void write_back_intra_pred_mode(H264Context *h){
548 64514ee8 Alexander Strange
    const int mb_xy= h->mb_xy;
549 0da71265 Michael Niedermayer
550
    h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
551
    h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
552
    h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
553
    h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
554
    h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
555
    h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
556
    h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
557
}
558
559
/**
560
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
561
 */
562
static inline int check_intra4x4_pred_mode(H264Context *h){
563
    MpegEncContext * const s = &h->s;
564
    static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
565
    static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
566
    int i;
567 115329f1 Diego Biurrun
568 0da71265 Michael Niedermayer
    if(!(h->top_samples_available&0x8000)){
569
        for(i=0; i<4; i++){
570
            int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
571
            if(status<0){
572 9b879566 Michel Bardiaux
                av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
573 0da71265 Michael Niedermayer
                return -1;
574
            } else if(status){
575
                h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
576
            }
577
        }
578
    }
579 115329f1 Diego Biurrun
580 d1d10e91 Michael Niedermayer
    if((h->left_samples_available&0x8888)!=0x8888){
581
        static const int mask[4]={0x8000,0x2000,0x80,0x20};
582 0da71265 Michael Niedermayer
        for(i=0; i<4; i++){
583 d1d10e91 Michael Niedermayer
            if(!(h->left_samples_available&mask[i])){
584 0da71265 Michael Niedermayer
            int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
585
            if(status<0){
586 9b879566 Michel Bardiaux
                av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
587 0da71265 Michael Niedermayer
                return -1;
588
            } else if(status){
589
                h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
590
            }
591 d1d10e91 Michael Niedermayer
            }
592 0da71265 Michael Niedermayer
        }
593
    }
594
595
    return 0;
596
} //FIXME cleanup like next
597
598
/**
599
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
600
 */
601
static inline int check_intra_pred_mode(H264Context *h, int mode){
602
    MpegEncContext * const s = &h->s;
603
    static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
604
    static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
605 115329f1 Diego Biurrun
606 43ff0714 Michael Niedermayer
    if(mode > 6U) {
607 5175b937 Loic Le Loarer
        av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
608 7440fe83 Michael Niedermayer
        return -1;
609 5175b937 Loic Le Loarer
    }
610 115329f1 Diego Biurrun
611 0da71265 Michael Niedermayer
    if(!(h->top_samples_available&0x8000)){
612
        mode= top[ mode ];
613
        if(mode<0){
614 9b879566 Michel Bardiaux
            av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
615 0da71265 Michael Niedermayer
            return -1;
616
        }
617
    }
618 115329f1 Diego Biurrun
619 d1d10e91 Michael Niedermayer
    if((h->left_samples_available&0x8080) != 0x8080){
620 0da71265 Michael Niedermayer
        mode= left[ mode ];
621 d1d10e91 Michael Niedermayer
        if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
622
            mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
623
        }
624 0da71265 Michael Niedermayer
        if(mode<0){
625 9b879566 Michel Bardiaux
            av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
626 0da71265 Michael Niedermayer
            return -1;
627 115329f1 Diego Biurrun
        }
628 0da71265 Michael Niedermayer
    }
629
630
    return mode;
631
}
632
633
/**
634
 * gets the predicted intra4x4 prediction mode.
635
 */
636
static inline int pred_intra_mode(H264Context *h, int n){
637
    const int index8= scan8[n];
638
    const int left= h->intra4x4_pred_mode_cache[index8 - 1];
639
    const int top = h->intra4x4_pred_mode_cache[index8 - 8];
640
    const int min= FFMIN(left, top);
641
642 a9c9a240 Michel Bardiaux
    tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
643 0da71265 Michael Niedermayer
644
    if(min<0) return DC_PRED;
645
    else      return min;
646
}
647
648
static inline void write_back_non_zero_count(H264Context *h){
649 64514ee8 Alexander Strange
    const int mb_xy= h->mb_xy;
650 0da71265 Michael Niedermayer
651 6867a90b Loic Le Loarer
    h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
652
    h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
653
    h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
654 53c05b1e Michael Niedermayer
    h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
655 6867a90b Loic Le Loarer
    h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
656
    h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
657
    h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
658 115329f1 Diego Biurrun
659 6867a90b Loic Le Loarer
    h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
660 53c05b1e Michael Niedermayer
    h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
661 6867a90b Loic Le Loarer
    h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
662 53c05b1e Michael Niedermayer
663 6867a90b Loic Le Loarer
    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
664 53c05b1e Michael Niedermayer
    h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
665 6867a90b Loic Le Loarer
    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
666 0da71265 Michael Niedermayer
}
667
668
/**
669 1412060e Diego Biurrun
 * gets the predicted number of non-zero coefficients.
670 0da71265 Michael Niedermayer
 * @param n block index
671
 */
672
static inline int pred_non_zero_count(H264Context *h, int n){
673
    const int index8= scan8[n];
674
    const int left= h->non_zero_count_cache[index8 - 1];
675
    const int top = h->non_zero_count_cache[index8 - 8];
676
    int i= left + top;
677 115329f1 Diego Biurrun
678 0da71265 Michael Niedermayer
    if(i<64) i= (i+1)>>1;
679
680 a9c9a240 Michel Bardiaux
    tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
681 0da71265 Michael Niedermayer
682
    return i&31;
683
}
684
685 1924f3ce Michael Niedermayer
static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
686
    const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
687 a9c9a240 Michel Bardiaux
    MpegEncContext *s = &h->s;
688 1924f3ce Michael Niedermayer
689 5d18eaad Loren Merritt
    /* there is no consistent mapping of mvs to neighboring locations that will
690
     * make mbaff happy, so we can't move all this logic to fill_caches */
691
    if(FRAME_MBAFF){
692 191e8ca7 Måns Rullgård
        const uint32_t *mb_types = s->current_picture_ptr->mb_type;
693 5d18eaad Loren Merritt
        const int16_t *mv;
694
        *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
695
        *C = h->mv_cache[list][scan8[0]-2];
696
697
        if(!MB_FIELD
698
           && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
699
            int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
700
            if(IS_INTERLACED(mb_types[topright_xy])){
701
#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
702
                const int x4 = X4, y4 = Y4;\
703
                const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
704 02f7695b Loren Merritt
                if(!USES_LIST(mb_type,list))\
705 5d18eaad Loren Merritt
                    return LIST_NOT_USED;\
706
                mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
707
                h->mv_cache[list][scan8[0]-2][0] = mv[0];\
708
                h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
709
                return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
710
711
                SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
712
            }
713
        }
714
        if(topright_ref == PART_NOT_AVAILABLE
715
           && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
716
           && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
717
            if(!MB_FIELD
718
               && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
719
                SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
720
            }
721
            if(MB_FIELD
722
               && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
723
               && i >= scan8[0]+8){
724 1412060e Diego Biurrun
                // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
725 02f7695b Loren Merritt
                SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
726 5d18eaad Loren Merritt
            }
727
        }
728
#undef SET_DIAG_MV
729
    }
730
731 1924f3ce Michael Niedermayer
    if(topright_ref != PART_NOT_AVAILABLE){
732
        *C= h->mv_cache[list][ i - 8 + part_width ];
733
        return topright_ref;
734
    }else{
735 a9c9a240 Michel Bardiaux
        tprintf(s->avctx, "topright MV not available\n");
736 95c26348 Michael Niedermayer
737 1924f3ce Michael Niedermayer
        *C= h->mv_cache[list][ i - 8 - 1 ];
738
        return h->ref_cache[list][ i - 8 - 1 ];
739
    }
740
}
741
742 0da71265 Michael Niedermayer
/**
743
 * gets the predicted MV.
744
 * @param n the block index
745
 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
746
 * @param mx the x component of the predicted motion vector
747
 * @param my the y component of the predicted motion vector
748
 */
749
static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
750
    const int index8= scan8[n];
751
    const int top_ref=      h->ref_cache[list][ index8 - 8 ];
752
    const int left_ref=     h->ref_cache[list][ index8 - 1 ];
753
    const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
754
    const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
755 1924f3ce Michael Niedermayer
    const int16_t * C;
756
    int diagonal_ref, match_count;
757
758 0da71265 Michael Niedermayer
    assert(part_width==1 || part_width==2 || part_width==4);
759 1924f3ce Michael Niedermayer
760 0da71265 Michael Niedermayer
/* mv_cache
761 115329f1 Diego Biurrun
  B . . A T T T T
762 0da71265 Michael Niedermayer
  U . . L . . , .
763
  U . . L . . . .
764
  U . . L . . , .
765
  . . . L . . . .
766
*/
767 1924f3ce Michael Niedermayer
768
    diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
769
    match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
770 a9c9a240 Michel Bardiaux
    tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
771 1924f3ce Michael Niedermayer
    if(match_count > 1){ //most common
772
        *mx= mid_pred(A[0], B[0], C[0]);
773
        *my= mid_pred(A[1], B[1], C[1]);
774
    }else if(match_count==1){
775
        if(left_ref==ref){
776
            *mx= A[0];
777 115329f1 Diego Biurrun
            *my= A[1];
778 1924f3ce Michael Niedermayer
        }else if(top_ref==ref){
779
            *mx= B[0];
780 115329f1 Diego Biurrun
            *my= B[1];
781 0da71265 Michael Niedermayer
        }else{
782 1924f3ce Michael Niedermayer
            *mx= C[0];
783 115329f1 Diego Biurrun
            *my= C[1];
784 0da71265 Michael Niedermayer
        }
785
    }else{
786 1924f3ce Michael Niedermayer
        if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
787 0da71265 Michael Niedermayer
            *mx= A[0];
788 115329f1 Diego Biurrun
            *my= A[1];
789 0da71265 Michael Niedermayer
        }else{
790 1924f3ce Michael Niedermayer
            *mx= mid_pred(A[0], B[0], C[0]);
791
            *my= mid_pred(A[1], B[1], C[1]);
792 0da71265 Michael Niedermayer
        }
793
    }
794 115329f1 Diego Biurrun
795 a9c9a240 Michel Bardiaux
    tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
796 0da71265 Michael Niedermayer
}
797
798
/**
799
 * gets the directionally predicted 16x8 MV.
800
 * @param n the block index
801
 * @param mx the x component of the predicted motion vector
802
 * @param my the y component of the predicted motion vector
803
 */
804
static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
805
    if(n==0){
806
        const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
807
        const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
808
809 a9c9a240 Michel Bardiaux
        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
810 115329f1 Diego Biurrun
811 0da71265 Michael Niedermayer
        if(top_ref == ref){
812
            *mx= B[0];
813
            *my= B[1];
814
            return;
815
        }
816
    }else{
817
        const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
818
        const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
819 115329f1 Diego Biurrun
820 a9c9a240 Michel Bardiaux
        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
821 0da71265 Michael Niedermayer
822
        if(left_ref == ref){
823
            *mx= A[0];
824
            *my= A[1];
825
            return;
826
        }
827
    }
828
829
    //RARE
830
    pred_motion(h, n, 4, list, ref, mx, my);
831
}
832
833
/**
834
 * gets the directionally predicted 8x16 MV.
835
 * @param n the block index
836
 * @param mx the x component of the predicted motion vector
837
 * @param my the y component of the predicted motion vector
838
 */
839
static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
840
    if(n==0){
841
        const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
842
        const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
843 115329f1 Diego Biurrun
844 a9c9a240 Michel Bardiaux
        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
845 0da71265 Michael Niedermayer
846
        if(left_ref == ref){
847
            *mx= A[0];
848
            *my= A[1];
849
            return;
850
        }
851
    }else{
852 1924f3ce Michael Niedermayer
        const int16_t * C;
853
        int diagonal_ref;
854
855
        diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
856 115329f1 Diego Biurrun
857 a9c9a240 Michel Bardiaux
        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
858 0da71265 Michael Niedermayer
859 115329f1 Diego Biurrun
        if(diagonal_ref == ref){
860 0da71265 Michael Niedermayer
            *mx= C[0];
861
            *my= C[1];
862
            return;
863
        }
864
    }
865
866
    //RARE
867
    pred_motion(h, n, 2, list, ref, mx, my);
868
}
869
870
static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
871
    const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
872
    const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
873
874 a9c9a240 Michel Bardiaux
    tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
875 0da71265 Michael Niedermayer
876
    if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
877
       || (top_ref == 0  && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
878
       || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
879 115329f1 Diego Biurrun
880 0da71265 Michael Niedermayer
        *mx = *my = 0;
881
        return;
882
    }
883 115329f1 Diego Biurrun
884 0da71265 Michael Niedermayer
    pred_motion(h, 0, 4, 0, 0, mx, my);
885
886
    return;
887
}
888
889 8b1fd554 Michael Niedermayer
static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
890
    int poc0 = h->ref_list[0][i].poc;
891
    int td = av_clip(poc1 - poc0, -128, 127);
892
    if(td == 0 || h->ref_list[0][i].long_ref){
893
        return 256;
894
    }else{
895
        int tb = av_clip(poc - poc0, -128, 127);
896
        int tx = (16384 + (FFABS(td) >> 1)) / td;
897
        return av_clip((tb*tx + 32) >> 6, -1024, 1023);
898
    }
899
}
900
901 5ad984c9 Loren Merritt
static inline void direct_dist_scale_factor(H264Context * const h){
902 2879c75f Michael Niedermayer
    MpegEncContext * const s = &h->s;
903
    const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
904 5ad984c9 Loren Merritt
    const int poc1 = h->ref_list[1][0].poc;
905 8b1fd554 Michael Niedermayer
    int i, field;
906
    for(field=0; field<2; field++){
907
        const int poc  = h->s.current_picture_ptr->field_poc[field];
908
        const int poc1 = h->ref_list[1][0].field_poc[field];
909
        for(i=0; i < 2*h->ref_count[0]; i++)
910
            h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
911 5ad984c9 Loren Merritt
    }
912 8b1fd554 Michael Niedermayer
913
    for(i=0; i<h->ref_count[0]; i++){
914
        h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
915 5d18eaad Loren Merritt
    }
916 5ad984c9 Loren Merritt
}
917 f4d3382d Michael Niedermayer
918
static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
919
    MpegEncContext * const s = &h->s;
920
    Picture * const ref1 = &h->ref_list[1][0];
921
    int j, old_ref, rfield;
922
    int start= mbafi ? 16                      : 0;
923
    int end  = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
924
    int interl= mbafi || s->picture_structure != PICT_FRAME;
925
926
    /* bogus; fills in for missing frames */
927
    memset(map[list], 0, sizeof(map[list]));
928
929
    for(rfield=0; rfield<2; rfield++){
930
        for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
931
            int poc = ref1->ref_poc[colfield][list][old_ref];
932
933
            if     (!interl)
934
                poc |= 3;
935
            else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
936
                poc= (poc&~3) + rfield + 1;
937
938
            for(j=start; j<end; j++){
939
                if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
940
                    int cur_ref= mbafi ? (j-16)^field : j;
941
                    map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
942
                    if(rfield == field)
943
                        map[list][old_ref] = cur_ref;
944
                    break;
945
                }
946
            }
947
        }
948
    }
949
}
950
951 2f944356 Loren Merritt
static inline void direct_ref_list_init(H264Context * const h){
952
    MpegEncContext * const s = &h->s;
953
    Picture * const ref1 = &h->ref_list[1][0];
954
    Picture * const cur = s->current_picture_ptr;
955 bbc78fb4 Diego Biurrun
    int list, j, field;
956 f4d3382d Michael Niedermayer
    int sidx= (s->picture_structure&1)^1;
957
    int ref1sidx= (ref1->reference&1)^1;
958 aa617518 Michael Niedermayer
959 2f944356 Loren Merritt
    for(list=0; list<2; list++){
960 2879c75f Michael Niedermayer
        cur->ref_count[sidx][list] = h->ref_count[list];
961 2f944356 Loren Merritt
        for(j=0; j<h->ref_count[list]; j++)
962 42de393d Michael Niedermayer
            cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
963 2f944356 Loren Merritt
    }
964 aa617518 Michael Niedermayer
965 7762cc3d Michael Niedermayer
    if(s->picture_structure == PICT_FRAME){
966 f4d3382d Michael Niedermayer
        memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
967
        memcpy(cur->ref_poc  [1], cur->ref_poc  [0], sizeof(cur->ref_poc  [0]));
968 7762cc3d Michael Niedermayer
    }
969 aa617518 Michael Niedermayer
970 48e025e5 Michael Niedermayer
    cur->mbaff= FRAME_MBAFF;
971 aa617518 Michael Niedermayer
972 9701840b Aurelien Jacobs
    if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
973 2f944356 Loren Merritt
        return;
974 aa617518 Michael Niedermayer
975 2f944356 Loren Merritt
    for(list=0; list<2; list++){
976 f4d3382d Michael Niedermayer
        fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
977
        for(field=0; field<2; field++)
978
            fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
979 2f944356 Loren Merritt
    }
980
}
981 5ad984c9 Loren Merritt
982
static inline void pred_direct_motion(H264Context * const h, int *mb_type){
983
    MpegEncContext * const s = &h->s;
984 d00eac6c Michael Niedermayer
    int b8_stride = h->b8_stride;
985
    int b4_stride = h->b_stride;
986
    int mb_xy = h->mb_xy;
987
    int mb_type_col[2];
988
    const int16_t (*l1mv0)[2], (*l1mv1)[2];
989
    const int8_t *l1ref0, *l1ref1;
990 5ad984c9 Loren Merritt
    const int is_b8x8 = IS_8X8(*mb_type);
991 88e7a4d1 Michael Niedermayer
    unsigned int sub_mb_type;
992 5ad984c9 Loren Merritt
    int i8, i4;
993
994 5d18eaad Loren Merritt
#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
995 d00eac6c Michael Niedermayer
996
    if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
997 53c193a9 Michael Niedermayer
        if(!IS_INTERLACED(*mb_type)){                    //     AFR/FR    -> AFL/FL
998 471341a7 Michael Niedermayer
            int cur_poc = s->current_picture_ptr->poc;
999
            int *col_poc = h->ref_list[1]->field_poc;
1000
            int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1001
            mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1002
            b8_stride = 0;
1003 60c9b24d Michael Niedermayer
        }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1004 d00eac6c Michael Niedermayer
            int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1005
            mb_xy += s->mb_stride*fieldoff;
1006
        }
1007
        goto single_col;
1008
    }else{                                               // AFL/AFR/FR/FL -> AFR/FR
1009
        if(IS_INTERLACED(*mb_type)){                     // AFL       /FL -> AFR/FR
1010
            mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1011
            mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1012
            mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1013
            b8_stride *= 3;
1014
            b4_stride *= 6;
1015
            //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1016
            if(    (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1017
                && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1018
                && !is_b8x8){
1019
                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1020
                *mb_type   |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1021
            }else{
1022
                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1023
                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
1024
            }
1025
        }else{                                           //     AFR/FR    -> AFR/FR
1026
single_col:
1027
            mb_type_col[0] =
1028
            mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1029 cc615d2c Michael Niedermayer
            if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1030
                /* FIXME save sub mb types from previous frames (or derive from MVs)
1031
                * so we know exactly what block size to use */
1032
                sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1033
                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
1034
            }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1035
                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1036
                *mb_type   |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1037
            }else{
1038
                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1039
                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
1040
            }
1041 d00eac6c Michael Niedermayer
        }
1042 5ad984c9 Loren Merritt
    }
1043
1044 7d54ecc9 Michael Niedermayer
    l1mv0  = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1045
    l1mv1  = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1046
    l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1047
    l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1048 9b5fab91 Michael Niedermayer
    if(!b8_stride){
1049
        if(s->mb_y&1){
1050
            l1ref0 += h->b8_stride;
1051
            l1ref1 += h->b8_stride;
1052
            l1mv0  +=  2*b4_stride;
1053
            l1mv1  +=  2*b4_stride;
1054
        }
1055 d00eac6c Michael Niedermayer
    }
1056 115329f1 Diego Biurrun
1057 5ad984c9 Loren Merritt
    if(h->direct_spatial_mv_pred){
1058
        int ref[2];
1059
        int mv[2][2];
1060
        int list;
1061
1062 5d18eaad Loren Merritt
        /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1063
1064 5ad984c9 Loren Merritt
        /* ref = min(neighbors) */
1065
        for(list=0; list<2; list++){
1066
            int refa = h->ref_cache[list][scan8[0] - 1];
1067
            int refb = h->ref_cache[list][scan8[0] - 8];
1068
            int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1069 9bec77fe Paul Kendall
            if(refc == PART_NOT_AVAILABLE)
1070 5ad984c9 Loren Merritt
                refc = h->ref_cache[list][scan8[0] - 8 - 1];
1071 29d05ebc Michael Niedermayer
            ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1072 5ad984c9 Loren Merritt
            if(ref[list] < 0)
1073
                ref[list] = -1;
1074
        }
1075
1076
        if(ref[0] < 0 && ref[1] < 0){
1077
            ref[0] = ref[1] = 0;
1078
            mv[0][0] = mv[0][1] =
1079
            mv[1][0] = mv[1][1] = 0;
1080
        }else{
1081
            for(list=0; list<2; list++){
1082
                if(ref[list] >= 0)
1083
                    pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1084
                else
1085
                    mv[list][0] = mv[list][1] = 0;
1086
            }
1087
        }
1088
1089
        if(ref[1] < 0){
1090 50b3ab0f Loren Merritt
            if(!is_b8x8)
1091
                *mb_type &= ~MB_TYPE_L1;
1092
            sub_mb_type &= ~MB_TYPE_L1;
1093 5ad984c9 Loren Merritt
        }else if(ref[0] < 0){
1094 50b3ab0f Loren Merritt
            if(!is_b8x8)
1095
                *mb_type &= ~MB_TYPE_L0;
1096
            sub_mb_type &= ~MB_TYPE_L0;
1097 5ad984c9 Loren Merritt
        }
1098
1099 d00eac6c Michael Niedermayer
        if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1100 50b3ab0f Loren Merritt
            for(i8=0; i8<4; i8++){
1101
                int x8 = i8&1;
1102
                int y8 = i8>>1;
1103
                int xy8 = x8+y8*b8_stride;
1104
                int xy4 = 3*x8+y8*b4_stride;
1105
                int a=0, b=0;
1106
1107
                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1108
                    continue;
1109
                h->sub_mb_type[i8] = sub_mb_type;
1110
1111
                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1112
                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1113 d00eac6c Michael Niedermayer
                if(!IS_INTRA(mb_type_col[y8])
1114 50b3ab0f Loren Merritt
                   && (   (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1115
                       || (l1ref0[xy8]  < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1116
                    if(ref[0] > 0)
1117
                        a= pack16to32(mv[0][0],mv[0][1]);
1118
                    if(ref[1] > 0)
1119
                        b= pack16to32(mv[1][0],mv[1][1]);
1120
                }else{
1121
                    a= pack16to32(mv[0][0],mv[0][1]);
1122
                    b= pack16to32(mv[1][0],mv[1][1]);
1123
                }
1124
                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1125
                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1126
            }
1127
        }else if(IS_16X16(*mb_type)){
1128 d19f5acb Michael Niedermayer
            int a=0, b=0;
1129
1130 cec93959 Loren Merritt
            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1131
            fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1132 d00eac6c Michael Niedermayer
            if(!IS_INTRA(mb_type_col[0])
1133 c26abfa5 Diego Biurrun
               && (   (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1134
                   || (l1ref0[0]  < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1135 bf4e3bd2 Måns Rullgård
                       && (h->x264_build>33 || !h->x264_build)))){
1136 5ad984c9 Loren Merritt
                if(ref[0] > 0)
1137 d19f5acb Michael Niedermayer
                    a= pack16to32(mv[0][0],mv[0][1]);
1138 5ad984c9 Loren Merritt
                if(ref[1] > 0)
1139 d19f5acb Michael Niedermayer
                    b= pack16to32(mv[1][0],mv[1][1]);
1140 5ad984c9 Loren Merritt
            }else{
1141 d19f5acb Michael Niedermayer
                a= pack16to32(mv[0][0],mv[0][1]);
1142
                b= pack16to32(mv[1][0],mv[1][1]);
1143 5ad984c9 Loren Merritt
            }
1144 d19f5acb Michael Niedermayer
            fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1145
            fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1146 5ad984c9 Loren Merritt
        }else{
1147
            for(i8=0; i8<4; i8++){
1148
                const int x8 = i8&1;
1149
                const int y8 = i8>>1;
1150 115329f1 Diego Biurrun
1151 5ad984c9 Loren Merritt
                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1152
                    continue;
1153
                h->sub_mb_type[i8] = sub_mb_type;
1154 115329f1 Diego Biurrun
1155 5ad984c9 Loren Merritt
                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1156
                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1157 cec93959 Loren Merritt
                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1158
                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1159 115329f1 Diego Biurrun
1160 5ad984c9 Loren Merritt
                /* col_zero_flag */
1161 2ccd25d0 Michael Niedermayer
                if(!IS_INTRA(mb_type_col[0]) && (   l1ref0[x8 + y8*b8_stride] == 0
1162
                                              || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1163 bf4e3bd2 Måns Rullgård
                                                  && (h->x264_build>33 || !h->x264_build)))){
1164 2ccd25d0 Michael Niedermayer
                    const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1165 f1f17e54 Loren Merritt
                    if(IS_SUB_8X8(sub_mb_type)){
1166 2ccd25d0 Michael Niedermayer
                        const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1167 c26abfa5 Diego Biurrun
                        if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1168 f1f17e54 Loren Merritt
                            if(ref[0] == 0)
1169
                                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1170
                            if(ref[1] == 0)
1171
                                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1172
                        }
1173
                    }else
1174 5ad984c9 Loren Merritt
                    for(i4=0; i4<4; i4++){
1175 2ccd25d0 Michael Niedermayer
                        const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1176 c26abfa5 Diego Biurrun
                        if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177 5ad984c9 Loren Merritt
                            if(ref[0] == 0)
1178
                                *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1179
                            if(ref[1] == 0)
1180
                                *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1181
                        }
1182
                    }
1183
                }
1184
            }
1185
        }
1186
    }else{ /* direct temporal mv pred */
1187 5d18eaad Loren Merritt
        const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1188
        const int *dist_scale_factor = h->dist_scale_factor;
1189 f4d3382d Michael Niedermayer
        int ref_offset= 0;
1190 5d18eaad Loren Merritt
1191 cc615d2c Michael Niedermayer
        if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1192 8b1fd554 Michael Niedermayer
            map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1193
            map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1194
            dist_scale_factor   =h->dist_scale_factor_field[s->mb_y&1];
1195 cc615d2c Michael Niedermayer
        }
1196 48e025e5 Michael Niedermayer
        if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1197 f4d3382d Michael Niedermayer
            ref_offset += 16;
1198 48e025e5 Michael Niedermayer
1199 cc615d2c Michael Niedermayer
        if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1200
            /* FIXME assumes direct_8x8_inference == 1 */
1201 c210fa61 Michael Niedermayer
            int y_shift  = 2*!IS_INTERLACED(*mb_type);
1202 5d18eaad Loren Merritt
1203 cc615d2c Michael Niedermayer
            for(i8=0; i8<4; i8++){
1204
                const int x8 = i8&1;
1205
                const int y8 = i8>>1;
1206
                int ref0, scale;
1207
                const int16_t (*l1mv)[2]= l1mv0;
1208 5d18eaad Loren Merritt
1209 cc615d2c Michael Niedermayer
                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1210
                    continue;
1211
                h->sub_mb_type[i8] = sub_mb_type;
1212
1213
                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1214
                if(IS_INTRA(mb_type_col[y8])){
1215
                    fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1216
                    fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1217
                    fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1218
                    continue;
1219
                }
1220
1221
                ref0 = l1ref0[x8 + y8*b8_stride];
1222
                if(ref0 >= 0)
1223 f4d3382d Michael Niedermayer
                    ref0 = map_col_to_list0[0][ref0 + ref_offset];
1224 cc615d2c Michael Niedermayer
                else{
1225 f4d3382d Michael Niedermayer
                    ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1226 cc615d2c Michael Niedermayer
                    l1mv= l1mv1;
1227
                }
1228
                scale = dist_scale_factor[ref0];
1229
                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1230
1231
                {
1232
                    const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1233
                    int my_col = (mv_col[1]<<y_shift)/2;
1234
                    int mx = (scale * mv_col[0] + 128) >> 8;
1235
                    int my = (scale * my_col + 128) >> 8;
1236
                    fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1237
                    fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1238 5d18eaad Loren Merritt
                }
1239
            }
1240 cc615d2c Michael Niedermayer
            return;
1241
        }
1242 5d18eaad Loren Merritt
1243
        /* one-to-one mv scaling */
1244
1245 5ad984c9 Loren Merritt
        if(IS_16X16(*mb_type)){
1246 fda51641 Michael Niedermayer
            int ref, mv0, mv1;
1247
1248 5ad984c9 Loren Merritt
            fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1249 d00eac6c Michael Niedermayer
            if(IS_INTRA(mb_type_col[0])){
1250 fda51641 Michael Niedermayer
                ref=mv0=mv1=0;
1251 5ad984c9 Loren Merritt
            }else{
1252 f4d3382d Michael Niedermayer
                const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1253
                                                : map_col_to_list0[1][l1ref1[0] + ref_offset];
1254 5d18eaad Loren Merritt
                const int scale = dist_scale_factor[ref0];
1255 8583bef8 Michael Niedermayer
                const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1256 5ad984c9 Loren Merritt
                int mv_l0[2];
1257 5d18eaad Loren Merritt
                mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1258
                mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1259 fda51641 Michael Niedermayer
                ref= ref0;
1260
                mv0= pack16to32(mv_l0[0],mv_l0[1]);
1261
                mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1262 5ad984c9 Loren Merritt
            }
1263 fda51641 Michael Niedermayer
            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1264
            fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1265
            fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1266 5ad984c9 Loren Merritt
        }else{
1267
            for(i8=0; i8<4; i8++){
1268
                const int x8 = i8&1;
1269
                const int y8 = i8>>1;
1270 5d18eaad Loren Merritt
                int ref0, scale;
1271 bf4e3bd2 Måns Rullgård
                const int16_t (*l1mv)[2]= l1mv0;
1272 8583bef8 Michael Niedermayer
1273 5ad984c9 Loren Merritt
                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1274
                    continue;
1275
                h->sub_mb_type[i8] = sub_mb_type;
1276 5d18eaad Loren Merritt
                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1277 d00eac6c Michael Niedermayer
                if(IS_INTRA(mb_type_col[0])){
1278 5ad984c9 Loren Merritt
                    fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1279
                    fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1280
                    fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1281
                    continue;
1282
                }
1283 115329f1 Diego Biurrun
1284 f4d3382d Michael Niedermayer
                ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1285 2f944356 Loren Merritt
                if(ref0 >= 0)
1286 5d18eaad Loren Merritt
                    ref0 = map_col_to_list0[0][ref0];
1287 8583bef8 Michael Niedermayer
                else{
1288 f4d3382d Michael Niedermayer
                    ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1289 8583bef8 Michael Niedermayer
                    l1mv= l1mv1;
1290
                }
1291 5d18eaad Loren Merritt
                scale = dist_scale_factor[ref0];
1292 115329f1 Diego Biurrun
1293 5ad984c9 Loren Merritt
                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1294 f1f17e54 Loren Merritt
                if(IS_SUB_8X8(sub_mb_type)){
1295 2ccd25d0 Michael Niedermayer
                    const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1296 5d18eaad Loren Merritt
                    int mx = (scale * mv_col[0] + 128) >> 8;
1297
                    int my = (scale * mv_col[1] + 128) >> 8;
1298 f1f17e54 Loren Merritt
                    fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1299
                    fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1300
                }else
1301 5ad984c9 Loren Merritt
                for(i4=0; i4<4; i4++){
1302 2ccd25d0 Michael Niedermayer
                    const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1303 5ad984c9 Loren Merritt
                    int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1304 5d18eaad Loren Merritt
                    mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1305
                    mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1306 5ad984c9 Loren Merritt
                    *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1307
                        pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1308
                }
1309
            }
1310
        }
1311
    }
1312
}
1313
1314 0da71265 Michael Niedermayer
static inline void write_back_motion(H264Context *h, int mb_type){
1315
    MpegEncContext * const s = &h->s;
1316
    const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1317
    const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1318
    int list;
1319
1320 2ea39252 Loren Merritt
    if(!USES_LIST(mb_type, 0))
1321
        fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1322
1323 3425501d Michael Niedermayer
    for(list=0; list<h->list_count; list++){
1324 0da71265 Michael Niedermayer
        int y;
1325 53b19144 Loren Merritt
        if(!USES_LIST(mb_type, list))
1326 5ad984c9 Loren Merritt
            continue;
1327 115329f1 Diego Biurrun
1328 0da71265 Michael Niedermayer
        for(y=0; y<4; y++){
1329
            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1330
            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1331
        }
1332 9e528114 Laurent Aimar
        if( h->pps.cabac ) {
1333 e6e77eb6 Loren Merritt
            if(IS_SKIP(mb_type))
1334
                fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1335
            else
1336 9e528114 Laurent Aimar
            for(y=0; y<4; y++){
1337
                *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1338
                *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1339
            }
1340
        }
1341 53b19144 Loren Merritt
1342
        {
1343 191e8ca7 Måns Rullgård
            int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1344 53b19144 Loren Merritt
            ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1345
            ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1346
            ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1347
            ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1348 0da71265 Michael Niedermayer
        }
1349
    }
1350 115329f1 Diego Biurrun
1351 9f5c1037 Michael Niedermayer
    if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1352 5ad984c9 Loren Merritt
        if(IS_8X8(mb_type)){
1353 53b19144 Loren Merritt
            uint8_t *direct_table = &h->direct_table[b8_xy];
1354
            direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1355
            direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1356
            direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1357 5ad984c9 Loren Merritt
        }
1358
    }
1359 0da71265 Michael Niedermayer
}
1360
1361
/**
1362
 * Decodes a network abstraction layer unit.
1363
 * @param consumed is the number of bytes used as input
1364
 * @param length is the length of the array
1365 3b66c4c5 Kevin Baragona
 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1366 115329f1 Diego Biurrun
 * @returns decoded bytes, might be src+1 if no escapes
1367 0da71265 Michael Niedermayer
 */
1368 30317501 Michael Niedermayer
static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1369 0da71265 Michael Niedermayer
    int i, si, di;
1370
    uint8_t *dst;
1371 24456882 Andreas Öman
    int bufidx;
1372 0da71265 Michael Niedermayer
1373 bb270c08 Diego Biurrun
//    src[0]&0x80;                //forbidden bit
1374 0da71265 Michael Niedermayer
    h->nal_ref_idc= src[0]>>5;
1375
    h->nal_unit_type= src[0]&0x1F;
1376
1377
    src++; length--;
1378 115329f1 Diego Biurrun
#if 0
1379 0da71265 Michael Niedermayer
    for(i=0; i<length; i++)
1380
        printf("%2X ", src[i]);
1381
#endif
1382
    for(i=0; i+1<length; i+=2){
1383
        if(src[i]) continue;
1384
        if(i>0 && src[i-1]==0) i--;
1385
        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1386
            if(src[i+2]!=3){
1387
                /* startcode, so we must be past the end */
1388
                length=i;
1389
            }
1390
            break;
1391
        }
1392
    }
1393
1394
    if(i>=length-1){ //no escaped 0
1395
        *dst_length= length;
1396
        *consumed= length+1; //+1 for the header
1397 115329f1 Diego Biurrun
        return src;
1398 0da71265 Michael Niedermayer
    }
1399
1400 24456882 Andreas Öman
    bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1401 d4369630 Alexander Strange
    h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1402 24456882 Andreas Öman
    dst= h->rbsp_buffer[bufidx];
1403 0da71265 Michael Niedermayer
1404 ac658be5 Francois Oligny-Lemieux
    if (dst == NULL){
1405
        return NULL;
1406
    }
1407
1408 3b66c4c5 Kevin Baragona
//printf("decoding esc\n");
1409 0da71265 Michael Niedermayer
    si=di=0;
1410 115329f1 Diego Biurrun
    while(si<length){
1411 0da71265 Michael Niedermayer
        //remove escapes (very rare 1:2^22)
1412
        if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1413
            if(src[si+2]==3){ //escape
1414
                dst[di++]= 0;
1415
                dst[di++]= 0;
1416
                si+=3;
1417 c8470cc1 Michael Niedermayer
                continue;
1418 0da71265 Michael Niedermayer
            }else //next start code
1419
                break;
1420
        }
1421
1422
        dst[di++]= src[si++];
1423
    }
1424
1425 d4369630 Alexander Strange
    memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1426
1427 0da71265 Michael Niedermayer
    *dst_length= di;
1428
    *consumed= si + 1;//+1 for the header
1429 90b5b51e Diego Biurrun
//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1430 0da71265 Michael Niedermayer
    return dst;
1431
}
1432
1433
/**
1434
 * identifies the exact end of the bitstream
1435
 * @return the length of the trailing, or 0 if damaged
1436
 */
1437 30317501 Michael Niedermayer
static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1438 0da71265 Michael Niedermayer
    int v= *src;
1439
    int r;
1440
1441 a9c9a240 Michel Bardiaux
    tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1442 0da71265 Michael Niedermayer
1443
    for(r=1; r<9; r++){
1444
        if(v&1) return r;
1445
        v>>=1;
1446
    }
1447
    return 0;
1448
}
1449
1450
/**
1451 1412060e Diego Biurrun
 * IDCT transforms the 16 dc values and dequantizes them.
1452 0da71265 Michael Niedermayer
 * @param qp quantization parameter
1453
 */
1454 239ea04c Loren Merritt
static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1455 0da71265 Michael Niedermayer
#define stride 16
1456
    int i;
1457
    int temp[16]; //FIXME check if this is a good idea
1458
    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1459
    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1460
1461
//memset(block, 64, 2*256);
1462
//return;
1463
    for(i=0; i<4; i++){
1464
        const int offset= y_offset[i];
1465
        const int z0= block[offset+stride*0] + block[offset+stride*4];
1466
        const int z1= block[offset+stride*0] - block[offset+stride*4];
1467
        const int z2= block[offset+stride*1] - block[offset+stride*5];
1468
        const int z3= block[offset+stride*1] + block[offset+stride*5];
1469
1470
        temp[4*i+0]= z0+z3;
1471
        temp[4*i+1]= z1+z2;
1472
        temp[4*i+2]= z1-z2;
1473
        temp[4*i+3]= z0-z3;
1474
    }
1475
1476
    for(i=0; i<4; i++){
1477
        const int offset= x_offset[i];
1478
        const int z0= temp[4*0+i] + temp[4*2+i];
1479
        const int z1= temp[4*0+i] - temp[4*2+i];
1480
        const int z2= temp[4*1+i] - temp[4*3+i];
1481
        const int z3= temp[4*1+i] + temp[4*3+i];
1482
1483 1412060e Diego Biurrun
        block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1484 239ea04c Loren Merritt
        block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1485
        block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1486
        block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1487 0da71265 Michael Niedermayer
    }
1488
}
1489
1490 e5017ab8 Laurent Aimar
#if 0
1491 0da71265 Michael Niedermayer
/**
1492 1412060e Diego Biurrun
 * DCT transforms the 16 dc values.
1493 0da71265 Michael Niedermayer
 * @param qp quantization parameter ??? FIXME
1494
 */
1495
static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1496
//    const int qmul= dequant_coeff[qp][0];
1497
    int i;
1498
    int temp[16]; //FIXME check if this is a good idea
1499
    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1500
    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1501

1502
    for(i=0; i<4; i++){
1503
        const int offset= y_offset[i];
1504
        const int z0= block[offset+stride*0] + block[offset+stride*4];
1505
        const int z1= block[offset+stride*0] - block[offset+stride*4];
1506
        const int z2= block[offset+stride*1] - block[offset+stride*5];
1507
        const int z3= block[offset+stride*1] + block[offset+stride*5];
1508

1509
        temp[4*i+0]= z0+z3;
1510
        temp[4*i+1]= z1+z2;
1511
        temp[4*i+2]= z1-z2;
1512
        temp[4*i+3]= z0-z3;
1513
    }
1514

1515
    for(i=0; i<4; i++){
1516
        const int offset= x_offset[i];
1517
        const int z0= temp[4*0+i] + temp[4*2+i];
1518
        const int z1= temp[4*0+i] - temp[4*2+i];
1519
        const int z2= temp[4*1+i] - temp[4*3+i];
1520
        const int z3= temp[4*1+i] + temp[4*3+i];
1521

1522
        block[stride*0 +offset]= (z0 + z3)>>1;
1523
        block[stride*2 +offset]= (z1 + z2)>>1;
1524
        block[stride*8 +offset]= (z1 - z2)>>1;
1525
        block[stride*10+offset]= (z0 - z3)>>1;
1526
    }
1527
}
1528 e5017ab8 Laurent Aimar
#endif
1529
1530 0da71265 Michael Niedermayer
#undef xStride
1531
#undef stride
1532
1533 239ea04c Loren Merritt
static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1534 0da71265 Michael Niedermayer
    const int stride= 16*2;
1535
    const int xStride= 16;
1536
    int a,b,c,d,e;
1537
1538
    a= block[stride*0 + xStride*0];
1539
    b= block[stride*0 + xStride*1];
1540
    c= block[stride*1 + xStride*0];
1541
    d= block[stride*1 + xStride*1];
1542
1543
    e= a-b;
1544
    a= a+b;
1545
    b= c-d;
1546
    c= c+d;
1547
1548 239ea04c Loren Merritt
    block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1549
    block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1550
    block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1551
    block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1552 0da71265 Michael Niedermayer
}
1553
1554 e5017ab8 Laurent Aimar
#if 0
1555 0da71265 Michael Niedermayer
static void chroma_dc_dct_c(DCTELEM *block){
1556
    const int stride= 16*2;
1557
    const int xStride= 16;
1558
    int a,b,c,d,e;
1559

1560
    a= block[stride*0 + xStride*0];
1561
    b= block[stride*0 + xStride*1];
1562
    c= block[stride*1 + xStride*0];
1563
    d= block[stride*1 + xStride*1];
1564

1565
    e= a-b;
1566
    a= a+b;
1567
    b= c-d;
1568
    c= c+d;
1569

1570
    block[stride*0 + xStride*0]= (a+c);
1571
    block[stride*0 + xStride*1]= (e+b);
1572
    block[stride*1 + xStride*0]= (a-c);
1573
    block[stride*1 + xStride*1]= (e-b);
1574
}
1575 e5017ab8 Laurent Aimar
#endif
1576 0da71265 Michael Niedermayer
1577
/**
1578
 * gets the chroma qp.
1579
 */
1580 4691a77d Andreas Öman
static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1581 5a78bfbd Michael Niedermayer
    return h->pps.chroma_qp_table[t][qscale];
1582 0da71265 Michael Niedermayer
}
1583
1584
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1585
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1586
                           int src_x_offset, int src_y_offset,
1587
                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1588
    MpegEncContext * const s = &h->s;
1589
    const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1590 5d18eaad Loren Merritt
    int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1591 0da71265 Michael Niedermayer
    const int luma_xy= (mx&3) + ((my&3)<<2);
1592 5d18eaad Loren Merritt
    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1593
    uint8_t * src_cb, * src_cr;
1594
    int extra_width= h->emu_edge_width;
1595
    int extra_height= h->emu_edge_height;
1596 0da71265 Michael Niedermayer
    int emu=0;
1597
    const int full_mx= mx>>2;
1598
    const int full_my= my>>2;
1599 fbd312fd Loren Merritt
    const int pic_width  = 16*s->mb_width;
1600 0d43dd8c Jeff Downs
    const int pic_height = 16*s->mb_height >> MB_FIELD;
1601 115329f1 Diego Biurrun
1602 0da71265 Michael Niedermayer
    if(mx&7) extra_width -= 3;
1603
    if(my&7) extra_height -= 3;
1604 115329f1 Diego Biurrun
1605
    if(   full_mx < 0-extra_width
1606
       || full_my < 0-extra_height
1607
       || full_mx + 16/*FIXME*/ > pic_width + extra_width
1608 fbd312fd Loren Merritt
       || full_my + 16/*FIXME*/ > pic_height + extra_height){
1609 5d18eaad Loren Merritt
        ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1610
            src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1611 0da71265 Michael Niedermayer
        emu=1;
1612
    }
1613 115329f1 Diego Biurrun
1614 5d18eaad Loren Merritt
    qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1615 0da71265 Michael Niedermayer
    if(!square){
1616 5d18eaad Loren Merritt
        qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1617 0da71265 Michael Niedermayer
    }
1618 115329f1 Diego Biurrun
1619 87352549 Michael Niedermayer
    if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1620 115329f1 Diego Biurrun
1621 0d43dd8c Jeff Downs
    if(MB_FIELD){
1622 5d18eaad Loren Merritt
        // chroma offset when predicting from a field of opposite parity
1623 2143b118 Jeff Downs
        my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1624 5d18eaad Loren Merritt
        emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1625
    }
1626
    src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1627
    src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1628
1629 0da71265 Michael Niedermayer
    if(emu){
1630 5d18eaad Loren Merritt
        ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1631 0da71265 Michael Niedermayer
            src_cb= s->edge_emu_buffer;
1632
    }
1633 5d18eaad Loren Merritt
    chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1634 0da71265 Michael Niedermayer
1635
    if(emu){
1636 5d18eaad Loren Merritt
        ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1637 0da71265 Michael Niedermayer
            src_cr= s->edge_emu_buffer;
1638
    }
1639 5d18eaad Loren Merritt
    chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1640 0da71265 Michael Niedermayer
}
1641
1642 9f2d1b4f Loren Merritt
static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1643 0da71265 Michael Niedermayer
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1644
                           int x_offset, int y_offset,
1645
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1646
                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1647
                           int list0, int list1){
1648
    MpegEncContext * const s = &h->s;
1649
    qpel_mc_func *qpix_op=  qpix_put;
1650
    h264_chroma_mc_func chroma_op= chroma_put;
1651 115329f1 Diego Biurrun
1652 5d18eaad Loren Merritt
    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
1653
    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
1654
    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
1655 0da71265 Michael Niedermayer
    x_offset += 8*s->mb_x;
1656 0d43dd8c Jeff Downs
    y_offset += 8*(s->mb_y >> MB_FIELD);
1657 115329f1 Diego Biurrun
1658 0da71265 Michael Niedermayer
    if(list0){
1659 1924f3ce Michael Niedermayer
        Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1660 0da71265 Michael Niedermayer
        mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1661
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
1662
                           qpix_op, chroma_op);
1663
1664
        qpix_op=  qpix_avg;
1665
        chroma_op= chroma_avg;
1666
    }
1667
1668
    if(list1){
1669 1924f3ce Michael Niedermayer
        Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1670 0da71265 Michael Niedermayer
        mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1671
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
1672
                           qpix_op, chroma_op);
1673
    }
1674
}
1675
1676 9f2d1b4f Loren Merritt
static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1677
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1678
                           int x_offset, int y_offset,
1679
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1680
                           h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1681
                           h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1682
                           int list0, int list1){
1683
    MpegEncContext * const s = &h->s;
1684
1685 5d18eaad Loren Merritt
    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
1686
    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
1687
    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
1688 9f2d1b4f Loren Merritt
    x_offset += 8*s->mb_x;
1689 0d43dd8c Jeff Downs
    y_offset += 8*(s->mb_y >> MB_FIELD);
1690 115329f1 Diego Biurrun
1691 9f2d1b4f Loren Merritt
    if(list0 && list1){
1692
        /* don't optimize for luma-only case, since B-frames usually
1693
         * use implicit weights => chroma too. */
1694
        uint8_t *tmp_cb = s->obmc_scratchpad;
1695 5d18eaad Loren Merritt
        uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1696
        uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1697 9f2d1b4f Loren Merritt
        int refn0 = h->ref_cache[0][ scan8[n] ];
1698
        int refn1 = h->ref_cache[1][ scan8[n] ];
1699
1700
        mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1701
                    dest_y, dest_cb, dest_cr,
1702
                    x_offset, y_offset, qpix_put, chroma_put);
1703
        mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1704
                    tmp_y, tmp_cb, tmp_cr,
1705
                    x_offset, y_offset, qpix_put, chroma_put);
1706
1707
        if(h->use_weight == 2){
1708
            int weight0 = h->implicit_weight[refn0][refn1];
1709
            int weight1 = 64 - weight0;
1710 5d18eaad Loren Merritt
            luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
1711
            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1712
            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1713 9f2d1b4f Loren Merritt
        }else{
1714 5d18eaad Loren Merritt
            luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1715 115329f1 Diego Biurrun
                            h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1716 e8b56208 Loren Merritt
                            h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1717 5d18eaad Loren Merritt
            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1718 115329f1 Diego Biurrun
                            h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1719 e8b56208 Loren Merritt
                            h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1720 5d18eaad Loren Merritt
            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1721 115329f1 Diego Biurrun
                            h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1722 e8b56208 Loren Merritt
                            h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1723 9f2d1b4f Loren Merritt
        }
1724
    }else{
1725
        int list = list1 ? 1 : 0;
1726
        int refn = h->ref_cache[list][ scan8[n] ];
1727
        Picture *ref= &h->ref_list[list][refn];
1728
        mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1729
                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
1730
                    qpix_put, chroma_put);
1731
1732 5d18eaad Loren Merritt
        luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1733 9f2d1b4f Loren Merritt
                       h->luma_weight[list][refn], h->luma_offset[list][refn]);
1734
        if(h->use_weight_chroma){
1735 5d18eaad Loren Merritt
            chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1736 9f2d1b4f Loren Merritt
                             h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1737 5d18eaad Loren Merritt
            chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1738 9f2d1b4f Loren Merritt
                             h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1739
        }
1740
    }
1741
}
1742
1743
static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1744
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1745
                           int x_offset, int y_offset,
1746
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1747
                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1748 115329f1 Diego Biurrun
                           h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1749 9f2d1b4f Loren Merritt
                           int list0, int list1){
1750
    if((h->use_weight==2 && list0 && list1
1751
        && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1752
       || h->use_weight==1)
1753
        mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1754
                         x_offset, y_offset, qpix_put, chroma_put,
1755
                         weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1756
    else
1757
        mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1758
                    x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1759
}
1760
1761 513fbd8e Loren Merritt
static inline void prefetch_motion(H264Context *h, int list){
1762
    /* fetch pixels for estimated mv 4 macroblocks ahead
1763
     * optimized for 64byte cache lines */
1764
    MpegEncContext * const s = &h->s;
1765
    const int refn = h->ref_cache[list][scan8[0]];
1766
    if(refn >= 0){
1767
        const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1768
        const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1769
        uint8_t **src= h->ref_list[list][refn].data;
1770 5d18eaad Loren Merritt
        int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1771 513fbd8e Loren Merritt
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
1772
        off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1773
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1774
    }
1775
}
1776
1777 0da71265 Michael Niedermayer
static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1778
                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1779 9f2d1b4f Loren Merritt
                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1780
                      h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1781 0da71265 Michael Niedermayer
    MpegEncContext * const s = &h->s;
1782 64514ee8 Alexander Strange
    const int mb_xy= h->mb_xy;
1783 0da71265 Michael Niedermayer
    const int mb_type= s->current_picture.mb_type[mb_xy];
1784 115329f1 Diego Biurrun
1785 0da71265 Michael Niedermayer
    assert(IS_INTER(mb_type));
1786 115329f1 Diego Biurrun
1787 513fbd8e Loren Merritt
    prefetch_motion(h, 0);
1788
1789 0da71265 Michael Niedermayer
    if(IS_16X16(mb_type)){
1790
        mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1791
                qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1792 9f2d1b4f Loren Merritt
                &weight_op[0], &weight_avg[0],
1793 0da71265 Michael Niedermayer
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1794
    }else if(IS_16X8(mb_type)){
1795
        mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1796
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1797 9f2d1b4f Loren Merritt
                &weight_op[1], &weight_avg[1],
1798 0da71265 Michael Niedermayer
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1799
        mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1800
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1801 9f2d1b4f Loren Merritt
                &weight_op[1], &weight_avg[1],
1802 0da71265 Michael Niedermayer
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1803
    }else if(IS_8X16(mb_type)){
1804 5d18eaad Loren Merritt
        mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1805 0da71265 Michael Niedermayer
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1806 9f2d1b4f Loren Merritt
                &weight_op[2], &weight_avg[2],
1807 0da71265 Michael Niedermayer
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1808 5d18eaad Loren Merritt
        mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1809 0da71265 Michael Niedermayer
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1810 9f2d1b4f Loren Merritt
                &weight_op[2], &weight_avg[2],
1811 0da71265 Michael Niedermayer
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1812
    }else{
1813
        int i;
1814 115329f1 Diego Biurrun
1815 0da71265 Michael Niedermayer
        assert(IS_8X8(mb_type));
1816
1817
        for(i=0; i<4; i++){
1818
            const int sub_mb_type= h->sub_mb_type[i];
1819
            const int n= 4*i;
1820
            int x_offset= (i&1)<<2;
1821
            int y_offset= (i&2)<<1;
1822
1823
            if(IS_SUB_8X8(sub_mb_type)){
1824
                mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1825
                    qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1826 9f2d1b4f Loren Merritt
                    &weight_op[3], &weight_avg[3],
1827 0da71265 Michael Niedermayer
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1828
            }else if(IS_SUB_8X4(sub_mb_type)){
1829
                mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1830
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1831 9f2d1b4f Loren Merritt
                    &weight_op[4], &weight_avg[4],
1832 0da71265 Michael Niedermayer
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1833
                mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1834
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1835 9f2d1b4f Loren Merritt
                    &weight_op[4], &weight_avg[4],
1836 0da71265 Michael Niedermayer
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1837
            }else if(IS_SUB_4X8(sub_mb_type)){
1838 5d18eaad Loren Merritt
                mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1839 0da71265 Michael Niedermayer
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1840 9f2d1b4f Loren Merritt
                    &weight_op[5], &weight_avg[5],
1841 0da71265 Michael Niedermayer
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1842 5d18eaad Loren Merritt
                mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1843 0da71265 Michael Niedermayer
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1844 9f2d1b4f Loren Merritt
                    &weight_op[5], &weight_avg[5],
1845 0da71265 Michael Niedermayer
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1846
            }else{
1847
                int j;
1848
                assert(IS_SUB_4X4(sub_mb_type));
1849
                for(j=0; j<4; j++){
1850
                    int sub_x_offset= x_offset + 2*(j&1);
1851
                    int sub_y_offset= y_offset +   (j&2);
1852
                    mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1853
                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1854 9f2d1b4f Loren Merritt
                        &weight_op[6], &weight_avg[6],
1855 0da71265 Michael Niedermayer
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1856
                }
1857
            }
1858
        }
1859
    }
1860 513fbd8e Loren Merritt
1861
    prefetch_motion(h, 1);
1862 0da71265 Michael Niedermayer
}
1863
1864 98a6fff9 Zuxy Meng
static av_cold void decode_init_vlc(void){
1865 0da71265 Michael Niedermayer
    static int done = 0;
1866
1867
    if (!done) {
1868
        int i;
1869 910e3668 Art Clarke
        int offset;
1870 0da71265 Michael Niedermayer
        done = 1;
1871
1872 910e3668 Art Clarke
        chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1873
        chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1874 115329f1 Diego Biurrun
        init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1875 0da71265 Michael Niedermayer
                 &chroma_dc_coeff_token_len [0], 1, 1,
1876 910e3668 Art Clarke
                 &chroma_dc_coeff_token_bits[0], 1, 1,
1877
                 INIT_VLC_USE_NEW_STATIC);
1878 0da71265 Michael Niedermayer
1879 910e3668 Art Clarke
        offset = 0;
1880 0da71265 Michael Niedermayer
        for(i=0; i<4; i++){
1881 910e3668 Art Clarke
            coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1882
            coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1883 115329f1 Diego Biurrun
            init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1884 0da71265 Michael Niedermayer
                     &coeff_token_len [i][0], 1, 1,
1885 910e3668 Art Clarke
                     &coeff_token_bits[i][0], 1, 1,
1886
                     INIT_VLC_USE_NEW_STATIC);
1887
            offset += coeff_token_vlc_tables_size[i];
1888 0da71265 Michael Niedermayer
        }
1889 910e3668 Art Clarke
        /*
1890
         * This is a one time safety check to make sure that
1891
         * the packed static coeff_token_vlc table sizes
1892
         * were initialized correctly.
1893
         */
1894 37d3e066 Aurelien Jacobs
        assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1895 0da71265 Michael Niedermayer
1896
        for(i=0; i<3; i++){
1897 910e3668 Art Clarke
            chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1898
            chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1899
            init_vlc(&chroma_dc_total_zeros_vlc[i],
1900
                     CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1901 0da71265 Michael Niedermayer
                     &chroma_dc_total_zeros_len [i][0], 1, 1,
1902 910e3668 Art Clarke
                     &chroma_dc_total_zeros_bits[i][0], 1, 1,
1903
                     INIT_VLC_USE_NEW_STATIC);
1904 0da71265 Michael Niedermayer
        }
1905
        for(i=0; i<15; i++){
1906 910e3668 Art Clarke
            total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1907
            total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1908
            init_vlc(&total_zeros_vlc[i],
1909
                     TOTAL_ZEROS_VLC_BITS, 16,
1910 0da71265 Michael Niedermayer
                     &total_zeros_len [i][0], 1, 1,
1911 910e3668 Art Clarke
                     &total_zeros_bits[i][0], 1, 1,
1912
                     INIT_VLC_USE_NEW_STATIC);
1913 0da71265 Michael Niedermayer
        }
1914
1915
        for(i=0; i<6; i++){
1916 910e3668 Art Clarke
            run_vlc[i].table = run_vlc_tables[i];
1917
            run_vlc[i].table_allocated = run_vlc_tables_size;
1918
            init_vlc(&run_vlc[i],
1919
                     RUN_VLC_BITS, 7,
1920 0da71265 Michael Niedermayer
                     &run_len [i][0], 1, 1,
1921 910e3668 Art Clarke
                     &run_bits[i][0], 1, 1,
1922
                     INIT_VLC_USE_NEW_STATIC);
1923 0da71265 Michael Niedermayer
        }
1924 910e3668 Art Clarke
        run7_vlc.table = run7_vlc_table,
1925
        run7_vlc.table_allocated = run7_vlc_table_size;
1926 115329f1 Diego Biurrun
        init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1927 0da71265 Michael Niedermayer
                 &run_len [6][0], 1, 1,
1928 910e3668 Art Clarke
                 &run_bits[6][0], 1, 1,
1929
                 INIT_VLC_USE_NEW_STATIC);
1930 0da71265 Michael Niedermayer
    }
1931
}
1932
1933
static void free_tables(H264Context *h){
1934 7978debd Andreas Öman
    int i;
1935 afebe2f7 Andreas Öman
    H264Context *hx;
1936 0da71265 Michael Niedermayer
    av_freep(&h->intra4x4_pred_mode);
1937 e5017ab8 Laurent Aimar
    av_freep(&h->chroma_pred_mode_table);
1938
    av_freep(&h->cbp_table);
1939 9e528114 Laurent Aimar
    av_freep(&h->mvd_table[0]);
1940
    av_freep(&h->mvd_table[1]);
1941 5ad984c9 Loren Merritt
    av_freep(&h->direct_table);
1942 0da71265 Michael Niedermayer
    av_freep(&h->non_zero_count);
1943
    av_freep(&h->slice_table_base);
1944
    h->slice_table= NULL;
1945 e5017ab8 Laurent Aimar
1946 0da71265 Michael Niedermayer
    av_freep(&h->mb2b_xy);
1947
    av_freep(&h->mb2b8_xy);
1948 9f2d1b4f Loren Merritt
1949 afebe2f7 Andreas Öman
    for(i = 0; i < h->s.avctx->thread_count; i++) {
1950
        hx = h->thread_context[i];
1951
        if(!hx) continue;
1952
        av_freep(&hx->top_borders[1]);
1953
        av_freep(&hx->top_borders[0]);
1954
        av_freep(&hx->s.obmc_scratchpad);
1955
    }
1956 0da71265 Michael Niedermayer
}
1957
1958 239ea04c Loren Merritt
static void init_dequant8_coeff_table(H264Context *h){
1959
    int i,q,x;
1960 548a1c8a Loren Merritt
    const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
1961 239ea04c Loren Merritt
    h->dequant8_coeff[0] = h->dequant8_buffer[0];
1962
    h->dequant8_coeff[1] = h->dequant8_buffer[1];
1963
1964
    for(i=0; i<2; i++ ){
1965
        if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
1966
            h->dequant8_coeff[1] = h->dequant8_buffer[0];
1967
            break;
1968
        }
1969
1970
        for(q=0; q<52; q++){
1971 d9ec210b Diego Pettenò
            int shift = div6[q];
1972
            int idx = rem6[q];
1973 239ea04c Loren Merritt
            for(x=0; x<64; x++)
1974 548a1c8a Loren Merritt
                h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
1975
                    ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
1976
                    h->pps.scaling_matrix8[i][x]) << shift;
1977 239ea04c Loren Merritt
        }
1978
    }
1979
}
1980
1981
static void init_dequant4_coeff_table(H264Context *h){
1982
    int i,j,q,x;
1983 ab2e3e2c Loren Merritt
    const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
1984 239ea04c Loren Merritt
    for(i=0; i<6; i++ ){
1985
        h->dequant4_coeff[i] = h->dequant4_buffer[i];
1986
        for(j=0; j<i; j++){
1987
            if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
1988
                h->dequant4_coeff[i] = h->dequant4_buffer[j];
1989
                break;
1990
            }
1991
        }
1992
        if(j<i)
1993
            continue;
1994
1995
        for(q=0; q<52; q++){
1996 d9ec210b Diego Pettenò
            int shift = div6[q] + 2;
1997
            int idx = rem6[q];
1998 239ea04c Loren Merritt
            for(x=0; x<16; x++)
1999 ab2e3e2c Loren Merritt
                h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2000
                    ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2001 239ea04c Loren Merritt
                    h->pps.scaling_matrix4[i][x]) << shift;
2002
        }
2003
    }
2004
}
2005
2006
static void init_dequant_tables(H264Context *h){
2007
    int i,x;
2008
    init_dequant4_coeff_table(h);
2009
    if(h->pps.transform_8x8_mode)
2010
        init_dequant8_coeff_table(h);
2011
    if(h->sps.transform_bypass){
2012
        for(i=0; i<6; i++)
2013
            for(x=0; x<16; x++)
2014
                h->dequant4_coeff[i][0][x] = 1<<6;
2015
        if(h->pps.transform_8x8_mode)
2016
            for(i=0; i<2; i++)
2017
                for(x=0; x<64; x++)
2018
                    h->dequant8_coeff[i][0][x] = 1<<6;
2019
    }
2020
}
2021
2022
2023 0da71265 Michael Niedermayer
/**
2024
 * allocates tables.
2025 3b66c4c5 Kevin Baragona
 * needs width/height
2026 0da71265 Michael Niedermayer
 */
2027
static int alloc_tables(H264Context *h){
2028
    MpegEncContext * const s = &h->s;
2029 7bc9090a Michael Niedermayer
    const int big_mb_num= s->mb_stride * (s->mb_height+1);
2030 239ea04c Loren Merritt
    int x,y;
2031 0da71265 Michael Niedermayer
2032
    CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
2033 e5017ab8 Laurent Aimar
2034 53c05b1e Michael Niedermayer
    CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
2035 b735aeea Michael Niedermayer
    CHECKED_ALLOCZ(h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2036 5d0e4cb8 Michael Niedermayer
    CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2037 0da71265 Michael Niedermayer
2038 7526ade2 Michael Niedermayer
    CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2039
    CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2040
    CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2041
    CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2042 e5017ab8 Laurent Aimar
2043 b735aeea Michael Niedermayer
    memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
2044 5d18eaad Loren Merritt
    h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2045 0da71265 Michael Niedermayer
2046 a55f20bd Loren Merritt
    CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint32_t));
2047
    CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2048 0da71265 Michael Niedermayer
    for(y=0; y<s->mb_height; y++){
2049
        for(x=0; x<s->mb_width; x++){
2050 7bc9090a Michael Niedermayer
            const int mb_xy= x + y*s->mb_stride;
2051 0da71265 Michael Niedermayer
            const int b_xy = 4*x + 4*y*h->b_stride;
2052
            const int b8_xy= 2*x + 2*y*h->b8_stride;
2053 115329f1 Diego Biurrun
2054 0da71265 Michael Niedermayer
            h->mb2b_xy [mb_xy]= b_xy;
2055
            h->mb2b8_xy[mb_xy]= b8_xy;
2056
        }
2057
    }
2058 9f2d1b4f Loren Merritt
2059 9c6221ae Gert Vervoort
    s->obmc_scratchpad = NULL;
2060
2061 56edbd81 Loren Merritt
    if(!h->dequant4_coeff[0])
2062
        init_dequant_tables(h);
2063
2064 0da71265 Michael Niedermayer
    return 0;
2065
fail:
2066
    free_tables(h);
2067
    return -1;
2068
}
2069
2070 afebe2f7 Andreas Öman
/**
2071
 * Mimic alloc_tables(), but for every context thread.
2072
 */
2073
static void clone_tables(H264Context *dst, H264Context *src){
2074
    dst->intra4x4_pred_mode       = src->intra4x4_pred_mode;
2075
    dst->non_zero_count           = src->non_zero_count;
2076
    dst->slice_table              = src->slice_table;
2077
    dst->cbp_table                = src->cbp_table;
2078
    dst->mb2b_xy                  = src->mb2b_xy;
2079
    dst->mb2b8_xy                 = src->mb2b8_xy;
2080
    dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
2081
    dst->mvd_table[0]             = src->mvd_table[0];
2082
    dst->mvd_table[1]             = src->mvd_table[1];
2083
    dst->direct_table             = src->direct_table;
2084
2085
    dst->s.obmc_scratchpad = NULL;
2086
    ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2087
}
2088
2089
/**
2090
 * Init context
2091
 * Allocate buffers which are not shared amongst multiple threads.
2092
 */
2093
static int context_init(H264Context *h){
2094
    CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2095
    CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2096
2097
    return 0;
2098
fail:
2099
    return -1; // free_tables will clean up for us
2100
}
2101
2102 98a6fff9 Zuxy Meng
static av_cold void common_init(H264Context *h){
2103 0da71265 Michael Niedermayer
    MpegEncContext * const s = &h->s;
2104
2105
    s->width = s->avctx->width;
2106
    s->height = s->avctx->height;
2107
    s->codec_id= s->avctx->codec->id;
2108 115329f1 Diego Biurrun
2109 c92a30bb Kostya Shishkov
    ff_h264_pred_init(&h->hpc, s->codec_id);
2110 0da71265 Michael Niedermayer
2111 239ea04c Loren Merritt
    h->dequant_coeff_pps= -1;
2112 9a41c2c7 Michael Niedermayer
    s->unrestricted_mv=1;
2113 0da71265 Michael Niedermayer
    s->decode=1; //FIXME
2114 56edbd81 Loren Merritt
2115 a5805aa9 Michael Niedermayer
    dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2116
2117 56edbd81 Loren Merritt
    memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2118
    memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2119 0da71265 Michael Niedermayer
}
2120
2121 98a6fff9 Zuxy Meng
static av_cold int decode_init(AVCodecContext *avctx){
2122 0da71265 Michael Niedermayer
    H264Context *h= avctx->priv_data;
2123
    MpegEncContext * const s = &h->s;
2124
2125 3edcacde Michael Niedermayer
    MPV_decode_defaults(s);
2126 115329f1 Diego Biurrun
2127 0da71265 Michael Niedermayer
    s->avctx = avctx;
2128
    common_init(h);
2129
2130
    s->out_format = FMT_H264;
2131
    s->workaround_bugs= avctx->workaround_bugs;
2132
2133
    // set defaults
2134
//    s->decode_mb= ff_h263_decode_mb;
2135 9a5a05d0 Andreas Öman
    s->quarter_sample = 1;
2136 0da71265 Michael Niedermayer
    s->low_delay= 1;
2137 7a9dba3c Michael Niedermayer
2138
    if(avctx->codec_id == CODEC_ID_SVQ3)
2139
        avctx->pix_fmt= PIX_FMT_YUVJ420P;
2140
    else
2141 1d42f410 Michael Niedermayer
        avctx->pix_fmt= PIX_FMT_YUV420P;
2142 0da71265 Michael Niedermayer
2143 c2212338 Panagiotis Issaris
    decode_init_vlc();
2144 115329f1 Diego Biurrun
2145 26165f99 Måns Rullgård
    if(avctx->extradata_size > 0 && avctx->extradata &&
2146
       *(char *)avctx->extradata == 1){
2147 4770b1b4 Roberto Togni
        h->is_avc = 1;
2148
        h->got_avcC = 0;
2149 26165f99 Måns Rullgård
    } else {
2150
        h->is_avc = 0;
2151 4770b1b4 Roberto Togni
    }
2152
2153 afebe2f7 Andreas Öman
    h->thread_context[0] = h;
2154 18c7be65 Jeff Downs
    h->outputed_poc = INT_MIN;
2155 e4b8f1fa Michael Niedermayer
    h->prev_poc_msb= 1<<16;
2156 0da71265 Michael Niedermayer
    return 0;
2157
}
2158
2159 af8aa846 Michael Niedermayer
static int frame_start(H264Context *h){
2160 0da71265 Michael Niedermayer
    MpegEncContext * const s = &h->s;
2161
    int i;
2162
2163 af8aa846 Michael Niedermayer
    if(MPV_frame_start(s, s->avctx) < 0)
2164
        return -1;
2165 0da71265 Michael Niedermayer
    ff_er_frame_start(s);
2166 3a22d7fa Jeff Downs
    /*
2167
     * MPV_frame_start uses pict_type to derive key_frame.
2168
     * This is incorrect for H.264; IDR markings must be used.
2169 1412060e Diego Biurrun
     * Zero here; IDR markings per slice in frame or fields are ORed in later.
2170 3a22d7fa Jeff Downs
     * See decode_nal_units().
2171
     */
2172
    s->current_picture_ptr->key_frame= 0;
2173 0da71265 Michael Niedermayer
2174
    assert(s->linesize && s->uvlinesize);
2175
2176
    for(i=0; i<16; i++){
2177
        h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2178 6867a90b Loic Le Loarer
        h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2179 0da71265 Michael Niedermayer
    }
2180
    for(i=0; i<4; i++){
2181
        h->block_offset[16+i]=
2182
        h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2183 6867a90b Loic Le Loarer
        h->block_offset[24+16+i]=
2184
        h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2185 0da71265 Michael Niedermayer
    }
2186
2187 934b0821 Loren Merritt
    /* can't be in alloc_tables because linesize isn't known there.
2188
     * FIXME: redo bipred weight to not require extra buffer? */
2189 afebe2f7 Andreas Öman
    for(i = 0; i < s->avctx->thread_count; i++)
2190
        if(!h->thread_context[i]->s.obmc_scratchpad)
2191
            h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2192 5d18eaad Loren Merritt
2193
    /* some macroblocks will be accessed before they're available */
2194 afebe2f7 Andreas Öman
    if(FRAME_MBAFF || s->avctx->thread_count > 1)
2195 b735aeea Michael Niedermayer
        memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2196 934b0821 Loren Merritt
2197 0da71265 Michael Niedermayer
//    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2198 28bb9eb2 Michael Niedermayer
2199 1412060e Diego Biurrun
    // We mark the current picture as non-reference after allocating it, so
2200 28bb9eb2 Michael Niedermayer
    // that if we break out due to an error it can be released automatically
2201
    // in the next MPV_frame_start().
2202
    // SVQ3 as well as most other codecs have only last/next/current and thus
2203
    // get released even with set reference, besides SVQ3 and others do not
2204
    // mark frames as reference later "naturally".
2205
    if(s->codec_id != CODEC_ID_SVQ3)
2206
        s->current_picture_ptr->reference= 0;
2207 357282c6 Michael Niedermayer
2208
    s->current_picture_ptr->field_poc[0]=
2209
    s->current_picture_ptr->field_poc[1]= INT_MAX;
2210 5118c6c7 Michael Niedermayer
    assert(s->current_picture_ptr->long_ref==0);
2211 357282c6 Michael Niedermayer
2212 af8aa846 Michael Niedermayer
    return 0;
2213 0da71265 Michael Niedermayer
}
2214
2215 93cc10fa Andreas Öman
static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2216 53c05b1e Michael Niedermayer
    MpegEncContext * const s = &h->s;
2217
    int i;
2218 5f7f9719 Michael Niedermayer
    int step    = 1;
2219
    int offset  = 1;
2220
    int uvoffset= 1;
2221
    int top_idx = 1;
2222
    int skiplast= 0;
2223 115329f1 Diego Biurrun
2224 53c05b1e Michael Niedermayer
    src_y  -=   linesize;
2225
    src_cb -= uvlinesize;
2226
    src_cr -= uvlinesize;
2227
2228 5f7f9719 Michael Niedermayer
    if(!simple && FRAME_MBAFF){
2229
        if(s->mb_y&1){
2230
            offset  = MB_MBAFF ? 1 : 17;
2231
            uvoffset= MB_MBAFF ? 1 : 9;
2232
            if(!MB_MBAFF){
2233
                *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y +  15*linesize);
2234
                *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2235
                if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2236
                    *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2237
                    *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2238
                }
2239
            }
2240
        }else{
2241
            if(!MB_MBAFF){
2242
                h->left_border[0]= h->top_borders[0][s->mb_x][15];
2243
                if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2244
                    h->left_border[34   ]= h->top_borders[0][s->mb_x][16+7  ];
2245
                    h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2246
                }
2247
                skiplast= 1;
2248
            }
2249
            offset  =
2250
            uvoffset=
2251
            top_idx = MB_MBAFF ? 0 : 1;
2252
        }
2253
        step= MB_MBAFF ? 2 : 1;
2254
    }
2255
2256 3b66c4c5 Kevin Baragona
    // There are two lines saved, the line above the the top macroblock of a pair,
2257 6867a90b Loic Le Loarer
    // and the line above the bottom macroblock
2258 5f7f9719 Michael Niedermayer
    h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2259
    for(i=1; i<17 - skiplast; i++){
2260
        h->left_border[offset+i*step]= src_y[15+i*  linesize];
2261 53c05b1e Michael Niedermayer
    }
2262 115329f1 Diego Biurrun
2263 5f7f9719 Michael Niedermayer
    *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y +  16*linesize);
2264
    *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2265 53c05b1e Michael Niedermayer
2266 87352549 Michael Niedermayer
    if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2267 5f7f9719 Michael Niedermayer
        h->left_border[uvoffset+34   ]= h->top_borders[top_idx][s->mb_x][16+7];
2268
        h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2269
        for(i=1; i<9 - skiplast; i++){
2270
            h->left_border[uvoffset+34   +i*step]= src_cb[7+i*uvlinesize];
2271
            h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2272 53c05b1e Michael Niedermayer
        }
2273 5f7f9719 Michael Niedermayer
        *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2274
        *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2275 53c05b1e Michael Niedermayer
    }
2276
}
2277
2278 93cc10fa Andreas Öman
static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2279 53c05b1e Michael Niedermayer
    MpegEncContext * const s = &h->s;
2280
    int temp8, i;
2281
    uint64_t temp64;
2282 b69378e2 Andreas Öman
    int deblock_left;
2283
    int deblock_top;
2284
    int mb_xy;
2285 5f7f9719 Michael Niedermayer
    int step    = 1;
2286
    int offset  = 1;
2287
    int uvoffset= 1;
2288
    int top_idx = 1;
2289
2290
    if(!simple && FRAME_MBAFF){
2291
        if(s->mb_y&1){
2292
            offset  = MB_MBAFF ? 1 : 17;
2293
            uvoffset= MB_MBAFF ? 1 : 9;
2294
        }else{
2295
            offset  =
2296
            uvoffset=
2297
            top_idx = MB_MBAFF ? 0 : 1;
2298
        }
2299
        step= MB_MBAFF ? 2 : 1;
2300
    }
2301 b69378e2 Andreas Öman
2302
    if(h->deblocking_filter == 2) {
2303 64514ee8 Alexander Strange
        mb_xy = h->mb_xy;
2304 b69378e2 Andreas Öman
        deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2305
        deblock_top  = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2306
    } else {
2307
        deblock_left = (s->mb_x > 0);
2308 6c805007 Michael Niedermayer
        deblock_top =  (s->mb_y > !!MB_FIELD);
2309 b69378e2 Andreas Öman
    }
2310 53c05b1e Michael Niedermayer
2311
    src_y  -=   linesize + 1;
2312
    src_cb -= uvlinesize + 1;
2313
    src_cr -= uvlinesize + 1;
2314
2315
#define XCHG(a,b,t,xchg)\
2316
t= a;\
2317
if(xchg)\
2318
    a= b;\
2319
b= t;
2320 d89dc06a Loren Merritt
2321
    if(deblock_left){
2322 5f7f9719 Michael Niedermayer
        for(i = !deblock_top; i<16; i++){
2323
            XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, xchg);
2324 d89dc06a Loren Merritt
        }
2325 5f7f9719 Michael Niedermayer
        XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, 1);
2326 d89dc06a Loren Merritt
    }
2327
2328
    if(deblock_top){
2329 5f7f9719 Michael Niedermayer
        XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2330
        XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2331 cad4368a Reimar Döffinger
        if(s->mb_x+1 < s->mb_width){
2332 5f7f9719 Michael Niedermayer
            XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2333 43efd19a Loren Merritt
        }
2334 53c05b1e Michael Niedermayer
    }
2335
2336 87352549 Michael Niedermayer
    if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2337 d89dc06a Loren Merritt
        if(deblock_left){
2338 5f7f9719 Michael Niedermayer
            for(i = !deblock_top; i<8; i++){
2339
                XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, xchg);
2340
                XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2341 d89dc06a Loren Merritt
            }
2342 5f7f9719 Michael Niedermayer
            XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, 1);
2343
            XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2344 d89dc06a Loren Merritt
        }
2345
        if(deblock_top){
2346 5f7f9719 Michael Niedermayer
            XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2347
            XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2348 53c05b1e Michael Niedermayer
        }
2349
    }
2350
}
2351
2352 5a6a6cc7 Diego Biurrun
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2353 0da71265 Michael Niedermayer
    MpegEncContext * const s = &h->s;
2354
    const int mb_x= s->mb_x;
2355
    const int mb_y= s->mb_y;
2356 64514ee8 Alexander Strange
    const int mb_xy= h->mb_xy;
2357 0da71265 Michael Niedermayer
    const int mb_type= s->current_picture.mb_type[mb_xy];
2358
    uint8_t  *dest_y, *dest_cb, *dest_cr;
2359
    int linesize, uvlinesize /*dct_offset*/;
2360
    int i;
2361 6867a90b Loic Le Loarer
    int *block_offset = &h->block_offset[0];
2362 41e4055b Michael Niedermayer
    const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2363
    const int is_h264 = simple || s->codec_id == CODEC_ID_H264;
2364 36940eca Loren Merritt
    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2365 ef9d1d15 Loren Merritt
    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2366 0da71265 Michael Niedermayer
2367 6120a343 Michael Niedermayer
    dest_y  = s->current_picture.data[0] + (mb_x + mb_y * s->linesize  ) * 16;
2368
    dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2369
    dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2370 0da71265 Michael Niedermayer
2371 a957c27b Loren Merritt
    s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2372
    s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2373
2374 bd91fee3 Alexander Strange
    if (!simple && MB_FIELD) {
2375 5d18eaad Loren Merritt
        linesize   = h->mb_linesize   = s->linesize * 2;
2376
        uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2377 6867a90b Loic Le Loarer
        block_offset = &h->block_offset[24];
2378 1412060e Diego Biurrun
        if(mb_y&1){ //FIXME move out of this function?
2379 0da71265 Michael Niedermayer
            dest_y -= s->linesize*15;
2380 6867a90b Loic Le Loarer
            dest_cb-= s->uvlinesize*7;
2381
            dest_cr-= s->uvlinesize*7;
2382 0da71265 Michael Niedermayer
        }
2383 5d18eaad Loren Merritt
        if(FRAME_MBAFF) {
2384
            int list;
2385 3425501d Michael Niedermayer
            for(list=0; list<h->list_count; list++){
2386 5d18eaad Loren Merritt
                if(!USES_LIST(mb_type, list))
2387
                    continue;
2388
                if(IS_16X16(mb_type)){
2389
                    int8_t *ref = &h->ref_cache[list][scan8[0]];
2390 1710856c Andreas Öman
                    fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2391 5d18eaad Loren Merritt
                }else{
2392
                    for(i=0; i<16; i+=4){
2393
                        int ref = h->ref_cache[list][scan8[i]];
2394
                        if(ref >= 0)
2395 1710856c Andreas Öman
                            fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2396 5d18eaad Loren Merritt
                    }
2397
                }
2398
            }
2399
        }
2400 0da71265 Michael Niedermayer
    } else {
2401 5d18eaad Loren Merritt
        linesize   = h->mb_linesize   = s->linesize;
2402
        uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2403 0da71265 Michael Niedermayer
//        dct_offset = s->linesize * 16;
2404
    }
2405 115329f1 Diego Biurrun
2406 bd91fee3 Alexander Strange
    if (!simple && IS_INTRA_PCM(mb_type)) {
2407 c1708e8d Michael Niedermayer
        for (i=0; i<16; i++) {
2408
            memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
2409 6fbcaaa0 Loic Le Loarer
        }
2410 c1708e8d Michael Niedermayer
        for (i=0; i<8; i++) {
2411
            memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
2412
            memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
2413 6fbcaaa0 Loic Le Loarer
        }
2414 e7e09b49 Loic Le Loarer
    } else {
2415
        if(IS_INTRA(mb_type)){
2416 5f7f9719 Michael Niedermayer
            if(h->deblocking_filter)
2417 93cc10fa Andreas Öman
                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2418 53c05b1e Michael Niedermayer
2419 87352549 Michael Niedermayer
            if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2420 c92a30bb Kostya Shishkov
                h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2421
                h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2422 e7e09b49 Loic Le Loarer
            }
2423 0da71265 Michael Niedermayer
2424 e7e09b49 Loic Le Loarer
            if(IS_INTRA4x4(mb_type)){
2425 bd91fee3 Alexander Strange
                if(simple || !s->encoding){
2426 43efd19a Loren Merritt
                    if(IS_8x8DCT(mb_type)){
2427 1eb96035 Michael Niedermayer
                        if(transform_bypass){
2428
                            idct_dc_add =
2429
                            idct_add    = s->dsp.add_pixels8;
2430 dae006d7 Michael Niedermayer
                        }else{
2431 1eb96035 Michael Niedermayer
                            idct_dc_add = s->dsp.h264_idct8_dc_add;
2432
                            idct_add    = s->dsp.h264_idct8_add;
2433
                        }
2434 43efd19a Loren Merritt
                        for(i=0; i<16; i+=4){
2435
                            uint8_t * const ptr= dest_y + block_offset[i];
2436
                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2437 41e4055b Michael Niedermayer
                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2438
                                h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2439
                            }else{
2440 ac0623b2 Michael Niedermayer
                                const int nnz = h->non_zero_count_cache[ scan8[i] ];
2441
                                h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2442
                                                            (h->topright_samples_available<<i)&0x4000, linesize);
2443
                                if(nnz){
2444
                                    if(nnz == 1 && h->mb[i*16])
2445
                                        idct_dc_add(ptr, h->mb + i*16, linesize);
2446
                                    else
2447
                                        idct_add   (ptr, h->mb + i*16, linesize);
2448
                                }
2449 41e4055b Michael Niedermayer
                            }
2450 43efd19a Loren Merritt
                        }
2451 1eb96035 Michael Niedermayer
                    }else{
2452
                        if(transform_bypass){
2453
                            idct_dc_add =
2454
                            idct_add    = s->dsp.add_pixels4;
2455
                        }else{
2456
                            idct_dc_add = s->dsp.h264_idct_dc_add;
2457
                            idct_add    = s->dsp.h264_idct_add;
2458
                        }
2459 aebb5d6d Michael Niedermayer
                        for(i=0; i<16; i++){
2460
                            uint8_t * const ptr= dest_y + block_offset[i];
2461
                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2462 e7e09b49 Loic Le Loarer
2463 aebb5d6d Michael Niedermayer
                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2464
                                h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2465
                            }else{
2466
                                uint8_t *topright;
2467
                                int nnz, tr;
2468
                                if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2469
                                    const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2470
                                    assert(mb_y || linesize <= block_offset[i]);
2471
                                    if(!topright_avail){
2472
                                        tr= ptr[3 - linesize]*0x01010101;
2473
                                        topright= (uint8_t*) &tr;
2474
                                    }else
2475
                                        topright= ptr + 4 - linesize;
2476 ac0623b2 Michael Niedermayer
                                }else
2477 aebb5d6d Michael Niedermayer
                                    topright= NULL;
2478
2479
                                h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2480
                                nnz = h->non_zero_count_cache[ scan8[i] ];
2481
                                if(nnz){
2482
                                    if(is_h264){
2483
                                        if(nnz == 1 && h->mb[i*16])
2484
                                            idct_dc_add(ptr, h->mb + i*16, linesize);
2485
                                        else
2486
                                            idct_add   (ptr, h->mb + i*16, linesize);
2487
                                    }else
2488
                                        svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2489
                                }
2490 ac0623b2 Michael Niedermayer
                            }
2491 41e4055b Michael Niedermayer
                        }
2492 8b82a956 Michael Niedermayer
                    }
2493 0da71265 Michael Niedermayer
                }
2494 e7e09b49 Loic Le Loarer
            }else{
2495 c92a30bb Kostya Shishkov
                h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2496 bd91fee3 Alexander Strange
                if(is_h264){
2497 36940eca Loren Merritt
                    if(!transform_bypass)
2498 93f0c0a4 Panagiotis Issaris
                        h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2499 36940eca Loren Merritt
                }else
2500 e7e09b49 Loic Le Loarer
                    svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2501 0da71265 Michael Niedermayer
            }
2502 5f7f9719 Michael Niedermayer
            if(h->deblocking_filter)
2503 93cc10fa Andreas Öman
                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2504 bd91fee3 Alexander Strange
        }else if(is_h264){
2505 e7e09b49 Loic Le Loarer
            hl_motion(h, dest_y, dest_cb, dest_cr,
2506 2833fc46 Loren Merritt
                      s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2507
                      s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2508 e7e09b49 Loic Le Loarer
                      s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2509 0da71265 Michael Niedermayer
        }
2510 e7e09b49 Loic Le Loarer
2511
2512
        if(!IS_INTRA4x4(mb_type)){
2513 bd91fee3 Alexander Strange
            if(is_h264){
2514 ef9d1d15 Loren Merritt
                if(IS_INTRA16x16(mb_type)){
2515 2fd1f0e0 Michael Niedermayer
                    if(transform_bypass){
2516
                        if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
2517 0a8ca22f Michael Niedermayer
                            h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
2518
                        }else{
2519
                            for(i=0; i<16; i++){
2520
                                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
2521 1eb96035 Michael Niedermayer
                                    s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
2522 0a8ca22f Michael Niedermayer
                            }
2523 2fd1f0e0 Michael Niedermayer
                        }
2524
                    }else{
2525
                         s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2526 41e4055b Michael Niedermayer
                    }
2527 49c084a7 Michael Niedermayer
                }else if(h->cbp&15){
2528 2fd1f0e0 Michael Niedermayer
                    if(transform_bypass){
2529 0a8ca22f Michael Niedermayer
                        const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2530 1eb96035 Michael Niedermayer
                        idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2531 0a8ca22f Michael Niedermayer
                        for(i=0; i<16; i+=di){
2532 62bc966f Michael Niedermayer
                            if(h->non_zero_count_cache[ scan8[i] ]){
2533 ef9d1d15 Loren Merritt
                                idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2534 0a8ca22f Michael Niedermayer
                            }
2535 ef9d1d15 Loren Merritt
                        }
2536 2fd1f0e0 Michael Niedermayer
                    }else{
2537
                        if(IS_8x8DCT(mb_type)){
2538
                            s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2539
                        }else{
2540
                            s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
2541