Statistics
| Branch: | Revision:

ffmpeg / libavcodec / h264.c @ 1968e438

History | View | Annotate | Download (311 KB)

1 0da71265 Michael Niedermayer
/*
2
 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4
 *
5 b78e7197 Diego Biurrun
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8 0da71265 Michael Niedermayer
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10 b78e7197 Diego Biurrun
 * version 2.1 of the License, or (at your option) any later version.
11 0da71265 Michael Niedermayer
 *
12 b78e7197 Diego Biurrun
 * FFmpeg is distributed in the hope that it will be useful,
13 0da71265 Michael Niedermayer
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18 b78e7197 Diego Biurrun
 * License along with FFmpeg; if not, write to the Free Software
19 5509bffa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 0da71265 Michael Niedermayer
 */
21 115329f1 Diego Biurrun
22 0da71265 Michael Niedermayer
/**
23
 * @file h264.c
24
 * H.264 / AVC / MPEG4 part10 codec.
25
 * @author Michael Niedermayer <michaelni@gmx.at>
26
 */
27
28
#include "dsputil.h"
29
#include "avcodec.h"
30
#include "mpegvideo.h"
31 26b4fe82 Aurelien Jacobs
#include "h264.h"
32 0da71265 Michael Niedermayer
#include "h264data.h"
33 26b4fe82 Aurelien Jacobs
#include "h264_parser.h"
34 0da71265 Michael Niedermayer
#include "golomb.h"
35 626464fb Kostya Shishkov
#include "rectangle.h"
36 369122dd NVIDIA Corporation
#include "vdpau_internal.h"
37 0da71265 Michael Niedermayer
38 e5017ab8 Laurent Aimar
#include "cabac.h"
39 52cb7981 Jeff Downs
#ifdef ARCH_X86
40 a6493a8f Diego Biurrun
#include "x86/h264_i386.h"
41 52cb7981 Jeff Downs
#endif
42 e5017ab8 Laurent Aimar
43 2848ce84 Loren Merritt
//#undef NDEBUG
44 0da71265 Michael Niedermayer
#include <assert.h>
45
46 2ddcf84b Jeff Downs
/**
47
 * Value of Picture.reference when Picture is not a reference picture, but
48
 * is held for delayed output.
49
 */
50
#define DELAYED_PIC_REF 4
51
52 0da71265 Michael Niedermayer
static VLC coeff_token_vlc[4];
53 910e3668 Art Clarke
static VLC_TYPE coeff_token_vlc_tables[520+332+280+256][2];
54
static const int coeff_token_vlc_tables_size[4]={520,332,280,256};
55
56 0da71265 Michael Niedermayer
static VLC chroma_dc_coeff_token_vlc;
57 910e3668 Art Clarke
static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2];
58
static const int chroma_dc_coeff_token_vlc_table_size = 256;
59 0da71265 Michael Niedermayer
60
static VLC total_zeros_vlc[15];
61 910e3668 Art Clarke
static VLC_TYPE total_zeros_vlc_tables[15][512][2];
62
static const int total_zeros_vlc_tables_size = 512;
63
64 0da71265 Michael Niedermayer
static VLC chroma_dc_total_zeros_vlc[3];
65 910e3668 Art Clarke
static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2];
66
static const int chroma_dc_total_zeros_vlc_tables_size = 8;
67 0da71265 Michael Niedermayer
68
static VLC run_vlc[6];
69 910e3668 Art Clarke
static VLC_TYPE run_vlc_tables[6][8][2];
70
static const int run_vlc_tables_size = 8;
71
72 0da71265 Michael Niedermayer
static VLC run7_vlc;
73 910e3668 Art Clarke
static VLC_TYPE run7_vlc_table[96][2];
74
static const int run7_vlc_table_size = 96;
75 0da71265 Michael Niedermayer
76 8b82a956 Michael Niedermayer
static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
77
static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
78 6ba71fc4 Loïc Le Loarer
static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
79 3e20143e Loren Merritt
static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
80 9c0e4624 Michael Niedermayer
static Picture * remove_long(H264Context *h, int i, int ref_mask);
81 8b82a956 Michael Niedermayer
82 849f1035 Måns Rullgård
static av_always_inline uint32_t pack16to32(int a, int b){
83 377ec888 Michael Niedermayer
#ifdef WORDS_BIGENDIAN
84
   return (b&0xFFFF) + (a<<16);
85
#else
86
   return (a&0xFFFF) + (b<<16);
87
#endif
88
}
89
90 d9ec210b Diego Pettenò
static const uint8_t rem6[52]={
91 acd8d10f Panagiotis Issaris
0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
92
};
93
94 d9ec210b Diego Pettenò
static const uint8_t div6[52]={
95 acd8d10f Panagiotis Issaris
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
96
};
97
98 143d7f14 Paul Kendall
static const int left_block_options[4][8]={
99
    {0,1,2,3,7,10,8,11},
100
    {2,2,3,3,8,11,8,11},
101
    {0,0,1,1,7,10,7,10},
102
    {0,2,0,2,7,10,7,10}
103
};
104 acd8d10f Panagiotis Issaris
105 8140955d Michael Niedermayer
#define LEVEL_TAB_BITS 8
106
static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
107
108 70abb407 Loren Merritt
static void fill_caches(H264Context *h, int mb_type, int for_deblock){
109 0da71265 Michael Niedermayer
    MpegEncContext * const s = &h->s;
110 64514ee8 Alexander Strange
    const int mb_xy= h->mb_xy;
111 0da71265 Michael Niedermayer
    int topleft_xy, top_xy, topright_xy, left_xy[2];
112
    int topleft_type, top_type, topright_type, left_type[2];
113 cac55c91 Anders Grönberg
    const int * left_block;
114 02f7695b Loren Merritt
    int topleft_partition= -1;
115 0da71265 Michael Niedermayer
    int i;
116
117 36e097bc Jeff Downs
    top_xy     = mb_xy  - (s->mb_stride << FIELD_PICTURE);
118
119 717b1733 Loren Merritt
    //FIXME deblocking could skip the intra and nnz parts.
120 36e097bc Jeff Downs
    if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
121 e2e5894a Loren Merritt
        return;
122
123 2cab6401 Diego Biurrun
    /* Wow, what a mess, why didn't they simplify the interlacing & intra
124
     * stuff, I can't imagine that these complex rules are worth it. */
125 115329f1 Diego Biurrun
126 6867a90b Loic Le Loarer
    topleft_xy = top_xy - 1;
127
    topright_xy= top_xy + 1;
128
    left_xy[1] = left_xy[0] = mb_xy-1;
129 143d7f14 Paul Kendall
    left_block = left_block_options[0];
130 5d18eaad Loren Merritt
    if(FRAME_MBAFF){
131 6867a90b Loic Le Loarer
        const int pair_xy          = s->mb_x     + (s->mb_y & ~1)*s->mb_stride;
132
        const int top_pair_xy      = pair_xy     - s->mb_stride;
133
        const int topleft_pair_xy  = top_pair_xy - 1;
134
        const int topright_pair_xy = top_pair_xy + 1;
135 6f3c50f2 Michael Niedermayer
        const int topleft_mb_field_flag  = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
136
        const int top_mb_field_flag      = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
137
        const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
138
        const int left_mb_field_flag     = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
139
        const int curr_mb_field_flag     = IS_INTERLACED(mb_type);
140 6867a90b Loic Le Loarer
        const int bottom = (s->mb_y & 1);
141 6f3c50f2 Michael Niedermayer
        tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
142 60c6ba7a Michael Niedermayer
143 6f3c50f2 Michael Niedermayer
        if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
144 6867a90b Loic Le Loarer
            top_xy -= s->mb_stride;
145
        }
146 6f3c50f2 Michael Niedermayer
        if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
147 6867a90b Loic Le Loarer
            topleft_xy -= s->mb_stride;
148 6f3c50f2 Michael Niedermayer
        } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
149 02f7695b Loren Merritt
            topleft_xy += s->mb_stride;
150 1412060e Diego Biurrun
            // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
151 02f7695b Loren Merritt
            topleft_partition = 0;
152 6867a90b Loic Le Loarer
        }
153 6f3c50f2 Michael Niedermayer
        if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
154 6867a90b Loic Le Loarer
            topright_xy -= s->mb_stride;
155
        }
156 6f3c50f2 Michael Niedermayer
        if (left_mb_field_flag != curr_mb_field_flag) {
157 6867a90b Loic Le Loarer
            left_xy[1] = left_xy[0] = pair_xy - 1;
158 6f3c50f2 Michael Niedermayer
            if (curr_mb_field_flag) {
159
                left_xy[1] += s->mb_stride;
160
                left_block = left_block_options[3];
161
            } else {
162 03a035e0 Michael Niedermayer
                left_block= left_block_options[2 - bottom];
163 6867a90b Loic Le Loarer
            }
164
        }
165 0da71265 Michael Niedermayer
    }
166
167 826de46e Loïc Le Loarer
    h->top_mb_xy = top_xy;
168
    h->left_mb_xy[0] = left_xy[0];
169
    h->left_mb_xy[1] = left_xy[1];
170 6ba71fc4 Loïc Le Loarer
    if(for_deblock){
171 717b1733 Loren Merritt
        topleft_type = 0;
172
        topright_type = 0;
173 b735aeea Michael Niedermayer
        top_type     = h->slice_table[top_xy     ] < 0xFFFF ? s->current_picture.mb_type[top_xy]     : 0;
174
        left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
175
        left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
176 5d18eaad Loren Merritt
177 e248cb60 Michael Niedermayer
        if(MB_MBAFF && !IS_INTRA(mb_type)){
178 5d18eaad Loren Merritt
            int list;
179 3425501d Michael Niedermayer
            for(list=0; list<h->list_count; list++){
180 e248cb60 Michael Niedermayer
                //These values where changed for ease of performing MC, we need to change them back
181
                //FIXME maybe we can make MC and loop filter use the same values or prevent
182
                //the MC code from changing ref_cache and rather use a temporary array.
183 5d18eaad Loren Merritt
                if(USES_LIST(mb_type,list)){
184 191e8ca7 Måns Rullgård
                    int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
185 5d18eaad Loren Merritt
                    *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
186 beca9a28 Michael Niedermayer
                    *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
187 5d18eaad Loren Merritt
                    ref += h->b8_stride;
188
                    *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
189 beca9a28 Michael Niedermayer
                    *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
190 5d18eaad Loren Merritt
                }
191
            }
192
        }
193 46f2f05f Michael Niedermayer
    }else{
194
        topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
195
        top_type     = h->slice_table[top_xy     ] == h->slice_num ? s->current_picture.mb_type[top_xy]     : 0;
196
        topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
197
        left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
198
        left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
199 0da71265 Michael Niedermayer
200
    if(IS_INTRA(mb_type)){
201 faa7e394 Michael Niedermayer
        int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
202 115329f1 Diego Biurrun
        h->topleft_samples_available=
203
        h->top_samples_available=
204 0da71265 Michael Niedermayer
        h->left_samples_available= 0xFFFF;
205
        h->topright_samples_available= 0xEEEA;
206
207 faa7e394 Michael Niedermayer
        if(!(top_type & type_mask)){
208 0da71265 Michael Niedermayer
            h->topleft_samples_available= 0xB3FF;
209
            h->top_samples_available= 0x33FF;
210
            h->topright_samples_available= 0x26EA;
211
        }
212 d1d10e91 Michael Niedermayer
        if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
213
            if(IS_INTERLACED(mb_type)){
214 faa7e394 Michael Niedermayer
                if(!(left_type[0] & type_mask)){
215 d1d10e91 Michael Niedermayer
                    h->topleft_samples_available&= 0xDFFF;
216
                    h->left_samples_available&= 0x5FFF;
217
                }
218 faa7e394 Michael Niedermayer
                if(!(left_type[1] & type_mask)){
219 d1d10e91 Michael Niedermayer
                    h->topleft_samples_available&= 0xFF5F;
220
                    h->left_samples_available&= 0xFF5F;
221
                }
222
            }else{
223
                int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
224
                                ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
225
                assert(left_xy[0] == left_xy[1]);
226 faa7e394 Michael Niedermayer
                if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
227 d1d10e91 Michael Niedermayer
                    h->topleft_samples_available&= 0xDF5F;
228
                    h->left_samples_available&= 0x5F5F;
229
                }
230
            }
231
        }else{
232 faa7e394 Michael Niedermayer
            if(!(left_type[0] & type_mask)){
233 0da71265 Michael Niedermayer
                h->topleft_samples_available&= 0xDF5F;
234
                h->left_samples_available&= 0x5F5F;
235
            }
236
        }
237 115329f1 Diego Biurrun
238 faa7e394 Michael Niedermayer
        if(!(topleft_type & type_mask))
239 0da71265 Michael Niedermayer
            h->topleft_samples_available&= 0x7FFF;
240 115329f1 Diego Biurrun
241 faa7e394 Michael Niedermayer
        if(!(topright_type & type_mask))
242 0da71265 Michael Niedermayer
            h->topright_samples_available&= 0xFBFF;
243 115329f1 Diego Biurrun
244 0da71265 Michael Niedermayer
        if(IS_INTRA4x4(mb_type)){
245
            if(IS_INTRA4x4(top_type)){
246
                h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
247
                h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
248
                h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
249
                h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
250
            }else{
251
                int pred;
252 faa7e394 Michael Niedermayer
                if(!(top_type & type_mask))
253 0da71265 Michael Niedermayer
                    pred= -1;
254 6fbcaaa0 Loic Le Loarer
                else{
255
                    pred= 2;
256 0da71265 Michael Niedermayer
                }
257
                h->intra4x4_pred_mode_cache[4+8*0]=
258
                h->intra4x4_pred_mode_cache[5+8*0]=
259
                h->intra4x4_pred_mode_cache[6+8*0]=
260
                h->intra4x4_pred_mode_cache[7+8*0]= pred;
261
            }
262
            for(i=0; i<2; i++){
263
                if(IS_INTRA4x4(left_type[i])){
264
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
265
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
266
                }else{
267
                    int pred;
268 faa7e394 Michael Niedermayer
                    if(!(left_type[i] & type_mask))
269 0da71265 Michael Niedermayer
                        pred= -1;
270 6fbcaaa0 Loic Le Loarer
                    else{
271
                        pred= 2;
272 0da71265 Michael Niedermayer
                    }
273
                    h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
274
                    h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
275
                }
276
            }
277
        }
278
    }
279 29671011 Michael Niedermayer
    }
280 115329f1 Diego Biurrun
281
282 0da71265 Michael Niedermayer
/*
283 115329f1 Diego Biurrun
0 . T T. T T T T
284
1 L . .L . . . .
285
2 L . .L . . . .
286
3 . T TL . . . .
287
4 L . .L . . . .
288
5 L . .. . . . .
289 0da71265 Michael Niedermayer
*/
290 1412060e Diego Biurrun
//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
291 0da71265 Michael Niedermayer
    if(top_type){
292 6867a90b Loic Le Loarer
        h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
293
        h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
294
        h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
295 53c05b1e Michael Niedermayer
        h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
296 115329f1 Diego Biurrun
297 6867a90b Loic Le Loarer
        h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
298 53c05b1e Michael Niedermayer
        h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
299 115329f1 Diego Biurrun
300 6867a90b Loic Le Loarer
        h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
301 53c05b1e Michael Niedermayer
        h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
302 115329f1 Diego Biurrun
303 0da71265 Michael Niedermayer
    }else{
304 115329f1 Diego Biurrun
        h->non_zero_count_cache[4+8*0]=
305 0da71265 Michael Niedermayer
        h->non_zero_count_cache[5+8*0]=
306
        h->non_zero_count_cache[6+8*0]=
307
        h->non_zero_count_cache[7+8*0]=
308 115329f1 Diego Biurrun
309 0da71265 Michael Niedermayer
        h->non_zero_count_cache[1+8*0]=
310
        h->non_zero_count_cache[2+8*0]=
311 115329f1 Diego Biurrun
312 0da71265 Michael Niedermayer
        h->non_zero_count_cache[1+8*3]=
313 3981c385 Michael Niedermayer
        h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
314 115329f1 Diego Biurrun
315 0da71265 Michael Niedermayer
    }
316 826de46e Loïc Le Loarer
317 6867a90b Loic Le Loarer
    for (i=0; i<2; i++) {
318
        if(left_type[i]){
319
            h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
320
            h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
321
            h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
322
            h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
323
        }else{
324 115329f1 Diego Biurrun
            h->non_zero_count_cache[3+8*1 + 2*8*i]=
325
            h->non_zero_count_cache[3+8*2 + 2*8*i]=
326
            h->non_zero_count_cache[0+8*1 +   8*i]=
327 6867a90b Loic Le Loarer
            h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
328 826de46e Loïc Le Loarer
        }
329
    }
330
331
    if( h->pps.cabac ) {
332
        // top_cbp
333
        if(top_type) {
334
            h->top_cbp = h->cbp_table[top_xy];
335
        } else if(IS_INTRA(mb_type)) {
336
            h->top_cbp = 0x1C0;
337
        } else {
338
            h->top_cbp = 0;
339
        }
340
        // left_cbp
341
        if (left_type[0]) {
342
            h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
343
        } else if(IS_INTRA(mb_type)) {
344
            h->left_cbp = 0x1C0;
345
        } else {
346
            h->left_cbp = 0;
347
        }
348
        if (left_type[0]) {
349
            h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
350
        }
351
        if (left_type[1]) {
352
            h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
353 6867a90b Loic Le Loarer
        }
354 0da71265 Michael Niedermayer
    }
355 6867a90b Loic Le Loarer
356 0da71265 Michael Niedermayer
#if 1
357 e2e5894a Loren Merritt
    if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
358 0da71265 Michael Niedermayer
        int list;
359 3425501d Michael Niedermayer
        for(list=0; list<h->list_count; list++){
360 e2e5894a Loren Merritt
            if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
361 0da71265 Michael Niedermayer
                /*if(!h->mv_cache_clean[list]){
362
                    memset(h->mv_cache [list],  0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
363
                    memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
364
                    h->mv_cache_clean[list]= 1;
365
                }*/
366 5ad984c9 Loren Merritt
                continue;
367 0da71265 Michael Niedermayer
            }
368
            h->mv_cache_clean[list]= 0;
369 115329f1 Diego Biurrun
370 53b19144 Loren Merritt
            if(USES_LIST(top_type, list)){
371 0da71265 Michael Niedermayer
                const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
372
                const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
373
                *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
374
                *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
375
                *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
376
                *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
377
                h->ref_cache[list][scan8[0] + 0 - 1*8]=
378
                h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
379
                h->ref_cache[list][scan8[0] + 2 - 1*8]=
380
                h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
381
            }else{
382 115329f1 Diego Biurrun
                *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
383
                *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
384
                *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
385 0da71265 Michael Niedermayer
                *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
386
                *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
387
            }
388
389 4672503d Loren Merritt
            for(i=0; i<2; i++){
390
                int cache_idx = scan8[0] - 1 + i*2*8;
391
                if(USES_LIST(left_type[i], list)){
392
                    const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
393
                    const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
394
                    *(uint32_t*)h->mv_cache[list][cache_idx  ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
395
                    *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
396
                    h->ref_cache[list][cache_idx  ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
397
                    h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
398
                }else{
399
                    *(uint32_t*)h->mv_cache [list][cache_idx  ]=
400
                    *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
401
                    h->ref_cache[list][cache_idx  ]=
402
                    h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
403
                }
404 0da71265 Michael Niedermayer
            }
405
406 0281d325 Michael Niedermayer
            if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
407 46f2f05f Michael Niedermayer
                continue;
408
409 53b19144 Loren Merritt
            if(USES_LIST(topleft_type, list)){
410 02f7695b Loren Merritt
                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
411
                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
412 e2e5894a Loren Merritt
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
413
                h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
414
            }else{
415
                *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
416
                h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
417
            }
418 115329f1 Diego Biurrun
419 53b19144 Loren Merritt
            if(USES_LIST(topright_type, list)){
420 e2e5894a Loren Merritt
                const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
421
                const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
422
                *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
423
                h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
424
            }else{
425
                *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
426
                h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
427
            }
428
429 ae08a563 Loren Merritt
            if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
430 717b1733 Loren Merritt
                continue;
431 115329f1 Diego Biurrun
432
            h->ref_cache[list][scan8[5 ]+1] =
433
            h->ref_cache[list][scan8[7 ]+1] =
434 3b66c4c5 Kevin Baragona
            h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
435 115329f1 Diego Biurrun
            h->ref_cache[list][scan8[4 ]] =
436 0da71265 Michael Niedermayer
            h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
437
            *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
438
            *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
439 3b66c4c5 Kevin Baragona
            *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
440 0da71265 Michael Niedermayer
            *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
441
            *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
442 9e528114 Laurent Aimar
443
            if( h->pps.cabac ) {
444
                /* XXX beurk, Load mvd */
445 53b19144 Loren Merritt
                if(USES_LIST(top_type, list)){
446 9e528114 Laurent Aimar
                    const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
447
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
448
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
449
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
450
                    *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
451
                }else{
452 115329f1 Diego Biurrun
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
453
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
454
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
455 9e528114 Laurent Aimar
                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
456
                }
457 53b19144 Loren Merritt
                if(USES_LIST(left_type[0], list)){
458 9e528114 Laurent Aimar
                    const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
459
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
460
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
461
                }else{
462
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
463
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
464
                }
465 53b19144 Loren Merritt
                if(USES_LIST(left_type[1], list)){
466 9e528114 Laurent Aimar
                    const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
467
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
468
                    *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
469
                }else{
470
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
471
                    *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
472
                }
473
                *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
474
                *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
475 3b66c4c5 Kevin Baragona
                *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
476 9e528114 Laurent Aimar
                *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
477
                *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
478 5ad984c9 Loren Merritt
479 9f5c1037 Michael Niedermayer
                if(h->slice_type_nos == FF_B_TYPE){
480 5ad984c9 Loren Merritt
                    fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
481
482
                    if(IS_DIRECT(top_type)){
483
                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
484
                    }else if(IS_8X8(top_type)){
485
                        int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
486
                        h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
487
                        h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
488
                    }else{
489
                        *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
490
                    }
491 115329f1 Diego Biurrun
492 5d18eaad Loren Merritt
                    if(IS_DIRECT(left_type[0]))
493
                        h->direct_cache[scan8[0] - 1 + 0*8]= 1;
494
                    else if(IS_8X8(left_type[0]))
495
                        h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
496
                    else
497
                        h->direct_cache[scan8[0] - 1 + 0*8]= 0;
498
499
                    if(IS_DIRECT(left_type[1]))
500 5ad984c9 Loren Merritt
                        h->direct_cache[scan8[0] - 1 + 2*8]= 1;
501 5d18eaad Loren Merritt
                    else if(IS_8X8(left_type[1]))
502
                        h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
503
                    else
504 5ad984c9 Loren Merritt
                        h->direct_cache[scan8[0] - 1 + 2*8]= 0;
505 5d18eaad Loren Merritt
                }
506
            }
507
508
            if(FRAME_MBAFF){
509
#define MAP_MVS\
510
                    MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
511
                    MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
512
                    MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
513
                    MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
514
                    MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
515
                    MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
516
                    MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
517
                    MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
518
                    MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
519
                    MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
520
                if(MB_FIELD){
521
#define MAP_F2F(idx, mb_type)\
522
                    if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
523
                        h->ref_cache[list][idx] <<= 1;\
524
                        h->mv_cache[list][idx][1] /= 2;\
525
                        h->mvd_cache[list][idx][1] /= 2;\
526
                    }
527
                    MAP_MVS
528
#undef MAP_F2F
529
                }else{
530
#define MAP_F2F(idx, mb_type)\
531
                    if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
532
                        h->ref_cache[list][idx] >>= 1;\
533
                        h->mv_cache[list][idx][1] <<= 1;\
534
                        h->mvd_cache[list][idx][1] <<= 1;\
535 5ad984c9 Loren Merritt
                    }
536 5d18eaad Loren Merritt
                    MAP_MVS
537
#undef MAP_F2F
538 5ad984c9 Loren Merritt
                }
539 9e528114 Laurent Aimar
            }
540 0da71265 Michael Niedermayer
        }
541
    }
542
#endif
543 43efd19a Loren Merritt
544
    h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
545 0da71265 Michael Niedermayer
}
546
547
static inline void write_back_intra_pred_mode(H264Context *h){
548 64514ee8 Alexander Strange
    const int mb_xy= h->mb_xy;
549 0da71265 Michael Niedermayer
550
    h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
551
    h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
552
    h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
553
    h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
554
    h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
555
    h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
556
    h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
557
}
558
559
/**
560
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
561
 */
562
static inline int check_intra4x4_pred_mode(H264Context *h){
563
    MpegEncContext * const s = &h->s;
564
    static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
565
    static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
566
    int i;
567 115329f1 Diego Biurrun
568 0da71265 Michael Niedermayer
    if(!(h->top_samples_available&0x8000)){
569
        for(i=0; i<4; i++){
570
            int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
571
            if(status<0){
572 9b879566 Michel Bardiaux
                av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
573 0da71265 Michael Niedermayer
                return -1;
574
            } else if(status){
575
                h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
576
            }
577
        }
578
    }
579 115329f1 Diego Biurrun
580 d1d10e91 Michael Niedermayer
    if((h->left_samples_available&0x8888)!=0x8888){
581
        static const int mask[4]={0x8000,0x2000,0x80,0x20};
582 0da71265 Michael Niedermayer
        for(i=0; i<4; i++){
583 d1d10e91 Michael Niedermayer
            if(!(h->left_samples_available&mask[i])){
584 26695973 Michael Niedermayer
                int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
585
                if(status<0){
586
                    av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
587
                    return -1;
588
                } else if(status){
589
                    h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
590
                }
591 d1d10e91 Michael Niedermayer
            }
592 0da71265 Michael Niedermayer
        }
593
    }
594
595
    return 0;
596
} //FIXME cleanup like next
597
598
/**
599
 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
600
 */
601
static inline int check_intra_pred_mode(H264Context *h, int mode){
602
    MpegEncContext * const s = &h->s;
603
    static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
604
    static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
605 115329f1 Diego Biurrun
606 43ff0714 Michael Niedermayer
    if(mode > 6U) {
607 5175b937 Loic Le Loarer
        av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
608 7440fe83 Michael Niedermayer
        return -1;
609 5175b937 Loic Le Loarer
    }
610 115329f1 Diego Biurrun
611 0da71265 Michael Niedermayer
    if(!(h->top_samples_available&0x8000)){
612
        mode= top[ mode ];
613
        if(mode<0){
614 9b879566 Michel Bardiaux
            av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
615 0da71265 Michael Niedermayer
            return -1;
616
        }
617
    }
618 115329f1 Diego Biurrun
619 d1d10e91 Michael Niedermayer
    if((h->left_samples_available&0x8080) != 0x8080){
620 0da71265 Michael Niedermayer
        mode= left[ mode ];
621 d1d10e91 Michael Niedermayer
        if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
622
            mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
623
        }
624 0da71265 Michael Niedermayer
        if(mode<0){
625 9b879566 Michel Bardiaux
            av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
626 0da71265 Michael Niedermayer
            return -1;
627 115329f1 Diego Biurrun
        }
628 0da71265 Michael Niedermayer
    }
629
630
    return mode;
631
}
632
633
/**
634
 * gets the predicted intra4x4 prediction mode.
635
 */
636
static inline int pred_intra_mode(H264Context *h, int n){
637
    const int index8= scan8[n];
638
    const int left= h->intra4x4_pred_mode_cache[index8 - 1];
639
    const int top = h->intra4x4_pred_mode_cache[index8 - 8];
640
    const int min= FFMIN(left, top);
641
642 a9c9a240 Michel Bardiaux
    tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
643 0da71265 Michael Niedermayer
644
    if(min<0) return DC_PRED;
645
    else      return min;
646
}
647
648
static inline void write_back_non_zero_count(H264Context *h){
649 64514ee8 Alexander Strange
    const int mb_xy= h->mb_xy;
650 0da71265 Michael Niedermayer
651 6867a90b Loic Le Loarer
    h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
652
    h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
653
    h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
654 53c05b1e Michael Niedermayer
    h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
655 6867a90b Loic Le Loarer
    h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
656
    h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
657
    h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
658 115329f1 Diego Biurrun
659 6867a90b Loic Le Loarer
    h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
660 53c05b1e Michael Niedermayer
    h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
661 6867a90b Loic Le Loarer
    h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
662 53c05b1e Michael Niedermayer
663 6867a90b Loic Le Loarer
    h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
664 53c05b1e Michael Niedermayer
    h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
665 6867a90b Loic Le Loarer
    h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
666 0da71265 Michael Niedermayer
}
667
668
/**
669 1412060e Diego Biurrun
 * gets the predicted number of non-zero coefficients.
670 0da71265 Michael Niedermayer
 * @param n block index
671
 */
672
static inline int pred_non_zero_count(H264Context *h, int n){
673
    const int index8= scan8[n];
674
    const int left= h->non_zero_count_cache[index8 - 1];
675
    const int top = h->non_zero_count_cache[index8 - 8];
676
    int i= left + top;
677 115329f1 Diego Biurrun
678 0da71265 Michael Niedermayer
    if(i<64) i= (i+1)>>1;
679
680 a9c9a240 Michel Bardiaux
    tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
681 0da71265 Michael Niedermayer
682
    return i&31;
683
}
684
685 1924f3ce Michael Niedermayer
static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
686
    const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
687 a9c9a240 Michel Bardiaux
    MpegEncContext *s = &h->s;
688 1924f3ce Michael Niedermayer
689 5d18eaad Loren Merritt
    /* there is no consistent mapping of mvs to neighboring locations that will
690
     * make mbaff happy, so we can't move all this logic to fill_caches */
691
    if(FRAME_MBAFF){
692 191e8ca7 Måns Rullgård
        const uint32_t *mb_types = s->current_picture_ptr->mb_type;
693 5d18eaad Loren Merritt
        const int16_t *mv;
694
        *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
695
        *C = h->mv_cache[list][scan8[0]-2];
696
697
        if(!MB_FIELD
698
           && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
699
            int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
700
            if(IS_INTERLACED(mb_types[topright_xy])){
701
#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
702
                const int x4 = X4, y4 = Y4;\
703
                const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
704 02f7695b Loren Merritt
                if(!USES_LIST(mb_type,list))\
705 5d18eaad Loren Merritt
                    return LIST_NOT_USED;\
706
                mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
707
                h->mv_cache[list][scan8[0]-2][0] = mv[0];\
708
                h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
709
                return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
710
711
                SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
712
            }
713
        }
714
        if(topright_ref == PART_NOT_AVAILABLE
715
           && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
716
           && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
717
            if(!MB_FIELD
718
               && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
719
                SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
720
            }
721
            if(MB_FIELD
722
               && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
723
               && i >= scan8[0]+8){
724 1412060e Diego Biurrun
                // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
725 02f7695b Loren Merritt
                SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
726 5d18eaad Loren Merritt
            }
727
        }
728
#undef SET_DIAG_MV
729
    }
730
731 1924f3ce Michael Niedermayer
    if(topright_ref != PART_NOT_AVAILABLE){
732
        *C= h->mv_cache[list][ i - 8 + part_width ];
733
        return topright_ref;
734
    }else{
735 a9c9a240 Michel Bardiaux
        tprintf(s->avctx, "topright MV not available\n");
736 95c26348 Michael Niedermayer
737 1924f3ce Michael Niedermayer
        *C= h->mv_cache[list][ i - 8 - 1 ];
738
        return h->ref_cache[list][ i - 8 - 1 ];
739
    }
740
}
741
742 0da71265 Michael Niedermayer
/**
743
 * gets the predicted MV.
744
 * @param n the block index
745
 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
746
 * @param mx the x component of the predicted motion vector
747
 * @param my the y component of the predicted motion vector
748
 */
749
static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
750
    const int index8= scan8[n];
751
    const int top_ref=      h->ref_cache[list][ index8 - 8 ];
752
    const int left_ref=     h->ref_cache[list][ index8 - 1 ];
753
    const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
754
    const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
755 1924f3ce Michael Niedermayer
    const int16_t * C;
756
    int diagonal_ref, match_count;
757
758 0da71265 Michael Niedermayer
    assert(part_width==1 || part_width==2 || part_width==4);
759 1924f3ce Michael Niedermayer
760 0da71265 Michael Niedermayer
/* mv_cache
761 115329f1 Diego Biurrun
  B . . A T T T T
762 0da71265 Michael Niedermayer
  U . . L . . , .
763
  U . . L . . . .
764
  U . . L . . , .
765
  . . . L . . . .
766
*/
767 1924f3ce Michael Niedermayer
768
    diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
769
    match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
770 a9c9a240 Michel Bardiaux
    tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
771 1924f3ce Michael Niedermayer
    if(match_count > 1){ //most common
772
        *mx= mid_pred(A[0], B[0], C[0]);
773
        *my= mid_pred(A[1], B[1], C[1]);
774
    }else if(match_count==1){
775
        if(left_ref==ref){
776
            *mx= A[0];
777 115329f1 Diego Biurrun
            *my= A[1];
778 1924f3ce Michael Niedermayer
        }else if(top_ref==ref){
779
            *mx= B[0];
780 115329f1 Diego Biurrun
            *my= B[1];
781 0da71265 Michael Niedermayer
        }else{
782 1924f3ce Michael Niedermayer
            *mx= C[0];
783 115329f1 Diego Biurrun
            *my= C[1];
784 0da71265 Michael Niedermayer
        }
785
    }else{
786 1924f3ce Michael Niedermayer
        if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
787 0da71265 Michael Niedermayer
            *mx= A[0];
788 115329f1 Diego Biurrun
            *my= A[1];
789 0da71265 Michael Niedermayer
        }else{
790 1924f3ce Michael Niedermayer
            *mx= mid_pred(A[0], B[0], C[0]);
791
            *my= mid_pred(A[1], B[1], C[1]);
792 0da71265 Michael Niedermayer
        }
793
    }
794 115329f1 Diego Biurrun
795 a9c9a240 Michel Bardiaux
    tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
796 0da71265 Michael Niedermayer
}
797
798
/**
799
 * gets the directionally predicted 16x8 MV.
800
 * @param n the block index
801
 * @param mx the x component of the predicted motion vector
802
 * @param my the y component of the predicted motion vector
803
 */
804
static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
805
    if(n==0){
806
        const int top_ref=      h->ref_cache[list][ scan8[0] - 8 ];
807
        const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
808
809 a9c9a240 Michel Bardiaux
        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
810 115329f1 Diego Biurrun
811 0da71265 Michael Niedermayer
        if(top_ref == ref){
812
            *mx= B[0];
813
            *my= B[1];
814
            return;
815
        }
816
    }else{
817
        const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
818
        const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
819 115329f1 Diego Biurrun
820 a9c9a240 Michel Bardiaux
        tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
821 0da71265 Michael Niedermayer
822
        if(left_ref == ref){
823
            *mx= A[0];
824
            *my= A[1];
825
            return;
826
        }
827
    }
828
829
    //RARE
830
    pred_motion(h, n, 4, list, ref, mx, my);
831
}
832
833
/**
834
 * gets the directionally predicted 8x16 MV.
835
 * @param n the block index
836
 * @param mx the x component of the predicted motion vector
837
 * @param my the y component of the predicted motion vector
838
 */
839
static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
840
    if(n==0){
841
        const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
842
        const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
843 115329f1 Diego Biurrun
844 a9c9a240 Michel Bardiaux
        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
845 0da71265 Michael Niedermayer
846
        if(left_ref == ref){
847
            *mx= A[0];
848
            *my= A[1];
849
            return;
850
        }
851
    }else{
852 1924f3ce Michael Niedermayer
        const int16_t * C;
853
        int diagonal_ref;
854
855
        diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
856 115329f1 Diego Biurrun
857 a9c9a240 Michel Bardiaux
        tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
858 0da71265 Michael Niedermayer
859 115329f1 Diego Biurrun
        if(diagonal_ref == ref){
860 0da71265 Michael Niedermayer
            *mx= C[0];
861
            *my= C[1];
862
            return;
863
        }
864
    }
865
866
    //RARE
867
    pred_motion(h, n, 2, list, ref, mx, my);
868
}
869
870
static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
871
    const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
872
    const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
873
874 a9c9a240 Michel Bardiaux
    tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
875 0da71265 Michael Niedermayer
876
    if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
877 62ea19c0 Michael Niedermayer
       || !( top_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ])
878
       || !(left_ref | *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ])){
879 115329f1 Diego Biurrun
880 0da71265 Michael Niedermayer
        *mx = *my = 0;
881
        return;
882
    }
883 115329f1 Diego Biurrun
884 0da71265 Michael Niedermayer
    pred_motion(h, 0, 4, 0, 0, mx, my);
885
886
    return;
887
}
888
889 8b1fd554 Michael Niedermayer
static int get_scale_factor(H264Context * const h, int poc, int poc1, int i){
890
    int poc0 = h->ref_list[0][i].poc;
891
    int td = av_clip(poc1 - poc0, -128, 127);
892
    if(td == 0 || h->ref_list[0][i].long_ref){
893
        return 256;
894
    }else{
895
        int tb = av_clip(poc - poc0, -128, 127);
896
        int tx = (16384 + (FFABS(td) >> 1)) / td;
897
        return av_clip((tb*tx + 32) >> 6, -1024, 1023);
898
    }
899
}
900
901 5ad984c9 Loren Merritt
static inline void direct_dist_scale_factor(H264Context * const h){
902 2879c75f Michael Niedermayer
    MpegEncContext * const s = &h->s;
903
    const int poc = h->s.current_picture_ptr->field_poc[ s->picture_structure == PICT_BOTTOM_FIELD ];
904 5ad984c9 Loren Merritt
    const int poc1 = h->ref_list[1][0].poc;
905 8b1fd554 Michael Niedermayer
    int i, field;
906
    for(field=0; field<2; field++){
907
        const int poc  = h->s.current_picture_ptr->field_poc[field];
908
        const int poc1 = h->ref_list[1][0].field_poc[field];
909
        for(i=0; i < 2*h->ref_count[0]; i++)
910
            h->dist_scale_factor_field[field][i^field] = get_scale_factor(h, poc, poc1, i+16);
911 5ad984c9 Loren Merritt
    }
912 8b1fd554 Michael Niedermayer
913
    for(i=0; i<h->ref_count[0]; i++){
914
        h->dist_scale_factor[i] = get_scale_factor(h, poc, poc1, i);
915 5d18eaad Loren Merritt
    }
916 5ad984c9 Loren Merritt
}
917 f4d3382d Michael Niedermayer
918
static void fill_colmap(H264Context *h, int map[2][16+32], int list, int field, int colfield, int mbafi){
919
    MpegEncContext * const s = &h->s;
920
    Picture * const ref1 = &h->ref_list[1][0];
921
    int j, old_ref, rfield;
922
    int start= mbafi ? 16                      : 0;
923
    int end  = mbafi ? 16+2*h->ref_count[list] : h->ref_count[list];
924
    int interl= mbafi || s->picture_structure != PICT_FRAME;
925
926
    /* bogus; fills in for missing frames */
927
    memset(map[list], 0, sizeof(map[list]));
928
929
    for(rfield=0; rfield<2; rfield++){
930
        for(old_ref=0; old_ref<ref1->ref_count[colfield][list]; old_ref++){
931
            int poc = ref1->ref_poc[colfield][list][old_ref];
932
933
            if     (!interl)
934
                poc |= 3;
935
            else if( interl && (poc&3) == 3) //FIXME store all MBAFF references so this isnt needed
936
                poc= (poc&~3) + rfield + 1;
937
938
            for(j=start; j<end; j++){
939
                if(4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3) == poc){
940
                    int cur_ref= mbafi ? (j-16)^field : j;
941
                    map[list][2*old_ref + (rfield^field) + 16] = cur_ref;
942
                    if(rfield == field)
943
                        map[list][old_ref] = cur_ref;
944
                    break;
945
                }
946
            }
947
        }
948
    }
949
}
950
951 2f944356 Loren Merritt
static inline void direct_ref_list_init(H264Context * const h){
952
    MpegEncContext * const s = &h->s;
953
    Picture * const ref1 = &h->ref_list[1][0];
954
    Picture * const cur = s->current_picture_ptr;
955 bbc78fb4 Diego Biurrun
    int list, j, field;
956 f4d3382d Michael Niedermayer
    int sidx= (s->picture_structure&1)^1;
957
    int ref1sidx= (ref1->reference&1)^1;
958 aa617518 Michael Niedermayer
959 2f944356 Loren Merritt
    for(list=0; list<2; list++){
960 2879c75f Michael Niedermayer
        cur->ref_count[sidx][list] = h->ref_count[list];
961 2f944356 Loren Merritt
        for(j=0; j<h->ref_count[list]; j++)
962 42de393d Michael Niedermayer
            cur->ref_poc[sidx][list][j] = 4*h->ref_list[list][j].frame_num + (h->ref_list[list][j].reference&3);
963 2f944356 Loren Merritt
    }
964 aa617518 Michael Niedermayer
965 7762cc3d Michael Niedermayer
    if(s->picture_structure == PICT_FRAME){
966 f4d3382d Michael Niedermayer
        memcpy(cur->ref_count[1], cur->ref_count[0], sizeof(cur->ref_count[0]));
967
        memcpy(cur->ref_poc  [1], cur->ref_poc  [0], sizeof(cur->ref_poc  [0]));
968 7762cc3d Michael Niedermayer
    }
969 aa617518 Michael Niedermayer
970 48e025e5 Michael Niedermayer
    cur->mbaff= FRAME_MBAFF;
971 aa617518 Michael Niedermayer
972 9701840b Aurelien Jacobs
    if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
973 2f944356 Loren Merritt
        return;
974 aa617518 Michael Niedermayer
975 2f944356 Loren Merritt
    for(list=0; list<2; list++){
976 f4d3382d Michael Niedermayer
        fill_colmap(h, h->map_col_to_list0, list, sidx, ref1sidx, 0);
977
        for(field=0; field<2; field++)
978
            fill_colmap(h, h->map_col_to_list0_field[field], list, field, field, 1);
979 2f944356 Loren Merritt
    }
980
}
981 5ad984c9 Loren Merritt
982
static inline void pred_direct_motion(H264Context * const h, int *mb_type){
983
    MpegEncContext * const s = &h->s;
984 d00eac6c Michael Niedermayer
    int b8_stride = h->b8_stride;
985
    int b4_stride = h->b_stride;
986
    int mb_xy = h->mb_xy;
987
    int mb_type_col[2];
988
    const int16_t (*l1mv0)[2], (*l1mv1)[2];
989
    const int8_t *l1ref0, *l1ref1;
990 5ad984c9 Loren Merritt
    const int is_b8x8 = IS_8X8(*mb_type);
991 88e7a4d1 Michael Niedermayer
    unsigned int sub_mb_type;
992 5ad984c9 Loren Merritt
    int i8, i4;
993
994 5d18eaad Loren Merritt
#define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
995 d00eac6c Michael Niedermayer
996
    if(IS_INTERLACED(h->ref_list[1][0].mb_type[mb_xy])){ // AFL/AFR/FR/FL -> AFL/FL
997 53c193a9 Michael Niedermayer
        if(!IS_INTERLACED(*mb_type)){                    //     AFR/FR    -> AFL/FL
998 471341a7 Michael Niedermayer
            int cur_poc = s->current_picture_ptr->poc;
999
            int *col_poc = h->ref_list[1]->field_poc;
1000
            int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1001
            mb_xy= s->mb_x + ((s->mb_y&~1) + col_parity)*s->mb_stride;
1002
            b8_stride = 0;
1003 60c9b24d Michael Niedermayer
        }else if(!(s->picture_structure & h->ref_list[1][0].reference) && !h->ref_list[1][0].mbaff){// FL -> FL & differ parity
1004 d00eac6c Michael Niedermayer
            int fieldoff= 2*(h->ref_list[1][0].reference)-3;
1005
            mb_xy += s->mb_stride*fieldoff;
1006
        }
1007
        goto single_col;
1008
    }else{                                               // AFL/AFR/FR/FL -> AFR/FR
1009
        if(IS_INTERLACED(*mb_type)){                     // AFL       /FL -> AFR/FR
1010
            mb_xy= s->mb_x + (s->mb_y&~1)*s->mb_stride;
1011
            mb_type_col[0] = h->ref_list[1][0].mb_type[mb_xy];
1012
            mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy + s->mb_stride];
1013
            b8_stride *= 3;
1014
            b4_stride *= 6;
1015
            //FIXME IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag
1016
            if(    (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)
1017
                && (mb_type_col[1] & MB_TYPE_16x16_OR_INTRA)
1018
                && !is_b8x8){
1019
                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1020
                *mb_type   |= MB_TYPE_16x8 |MB_TYPE_L0L1|MB_TYPE_DIRECT2; /* B_16x8 */
1021
            }else{
1022
                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1023
                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
1024
            }
1025
        }else{                                           //     AFR/FR    -> AFR/FR
1026
single_col:
1027
            mb_type_col[0] =
1028
            mb_type_col[1] = h->ref_list[1][0].mb_type[mb_xy];
1029 cc615d2c Michael Niedermayer
            if(IS_8X8(mb_type_col[0]) && !h->sps.direct_8x8_inference_flag){
1030
                /* FIXME save sub mb types from previous frames (or derive from MVs)
1031
                * so we know exactly what block size to use */
1032
                sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
1033
                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
1034
            }else if(!is_b8x8 && (mb_type_col[0] & MB_TYPE_16x16_OR_INTRA)){
1035
                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1036
                *mb_type   |= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
1037
            }else{
1038
                sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
1039
                *mb_type   |= MB_TYPE_8x8|MB_TYPE_L0L1;
1040
            }
1041 d00eac6c Michael Niedermayer
        }
1042 5ad984c9 Loren Merritt
    }
1043
1044 7d54ecc9 Michael Niedermayer
    l1mv0  = &h->ref_list[1][0].motion_val[0][h->mb2b_xy [mb_xy]];
1045
    l1mv1  = &h->ref_list[1][0].motion_val[1][h->mb2b_xy [mb_xy]];
1046
    l1ref0 = &h->ref_list[1][0].ref_index [0][h->mb2b8_xy[mb_xy]];
1047
    l1ref1 = &h->ref_list[1][0].ref_index [1][h->mb2b8_xy[mb_xy]];
1048 9b5fab91 Michael Niedermayer
    if(!b8_stride){
1049
        if(s->mb_y&1){
1050
            l1ref0 += h->b8_stride;
1051
            l1ref1 += h->b8_stride;
1052
            l1mv0  +=  2*b4_stride;
1053
            l1mv1  +=  2*b4_stride;
1054
        }
1055 d00eac6c Michael Niedermayer
    }
1056 115329f1 Diego Biurrun
1057 5ad984c9 Loren Merritt
    if(h->direct_spatial_mv_pred){
1058
        int ref[2];
1059
        int mv[2][2];
1060
        int list;
1061
1062 5d18eaad Loren Merritt
        /* FIXME interlacing + spatial direct uses wrong colocated block positions */
1063
1064 5ad984c9 Loren Merritt
        /* ref = min(neighbors) */
1065
        for(list=0; list<2; list++){
1066
            int refa = h->ref_cache[list][scan8[0] - 1];
1067
            int refb = h->ref_cache[list][scan8[0] - 8];
1068
            int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1069 9bec77fe Paul Kendall
            if(refc == PART_NOT_AVAILABLE)
1070 5ad984c9 Loren Merritt
                refc = h->ref_cache[list][scan8[0] - 8 - 1];
1071 29d05ebc Michael Niedermayer
            ref[list] = FFMIN3((unsigned)refa, (unsigned)refb, (unsigned)refc);
1072 5ad984c9 Loren Merritt
            if(ref[list] < 0)
1073
                ref[list] = -1;
1074
        }
1075
1076
        if(ref[0] < 0 && ref[1] < 0){
1077
            ref[0] = ref[1] = 0;
1078
            mv[0][0] = mv[0][1] =
1079
            mv[1][0] = mv[1][1] = 0;
1080
        }else{
1081
            for(list=0; list<2; list++){
1082
                if(ref[list] >= 0)
1083
                    pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1084
                else
1085
                    mv[list][0] = mv[list][1] = 0;
1086
            }
1087
        }
1088
1089
        if(ref[1] < 0){
1090 50b3ab0f Loren Merritt
            if(!is_b8x8)
1091
                *mb_type &= ~MB_TYPE_L1;
1092
            sub_mb_type &= ~MB_TYPE_L1;
1093 5ad984c9 Loren Merritt
        }else if(ref[0] < 0){
1094 50b3ab0f Loren Merritt
            if(!is_b8x8)
1095
                *mb_type &= ~MB_TYPE_L0;
1096
            sub_mb_type &= ~MB_TYPE_L0;
1097 5ad984c9 Loren Merritt
        }
1098
1099 d00eac6c Michael Niedermayer
        if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1100 50b3ab0f Loren Merritt
            for(i8=0; i8<4; i8++){
1101
                int x8 = i8&1;
1102
                int y8 = i8>>1;
1103
                int xy8 = x8+y8*b8_stride;
1104
                int xy4 = 3*x8+y8*b4_stride;
1105
                int a=0, b=0;
1106
1107
                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1108
                    continue;
1109
                h->sub_mb_type[i8] = sub_mb_type;
1110
1111
                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1112
                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1113 d00eac6c Michael Niedermayer
                if(!IS_INTRA(mb_type_col[y8])
1114 50b3ab0f Loren Merritt
                   && (   (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1115
                       || (l1ref0[xy8]  < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1116
                    if(ref[0] > 0)
1117
                        a= pack16to32(mv[0][0],mv[0][1]);
1118
                    if(ref[1] > 0)
1119
                        b= pack16to32(mv[1][0],mv[1][1]);
1120
                }else{
1121
                    a= pack16to32(mv[0][0],mv[0][1]);
1122
                    b= pack16to32(mv[1][0],mv[1][1]);
1123
                }
1124
                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1125
                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1126
            }
1127
        }else if(IS_16X16(*mb_type)){
1128 d19f5acb Michael Niedermayer
            int a=0, b=0;
1129
1130 cec93959 Loren Merritt
            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1131
            fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1132 d00eac6c Michael Niedermayer
            if(!IS_INTRA(mb_type_col[0])
1133 c26abfa5 Diego Biurrun
               && (   (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1134
                   || (l1ref0[0]  < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1135 bf4e3bd2 Måns Rullgård
                       && (h->x264_build>33 || !h->x264_build)))){
1136 5ad984c9 Loren Merritt
                if(ref[0] > 0)
1137 d19f5acb Michael Niedermayer
                    a= pack16to32(mv[0][0],mv[0][1]);
1138 5ad984c9 Loren Merritt
                if(ref[1] > 0)
1139 d19f5acb Michael Niedermayer
                    b= pack16to32(mv[1][0],mv[1][1]);
1140 5ad984c9 Loren Merritt
            }else{
1141 d19f5acb Michael Niedermayer
                a= pack16to32(mv[0][0],mv[0][1]);
1142
                b= pack16to32(mv[1][0],mv[1][1]);
1143 5ad984c9 Loren Merritt
            }
1144 d19f5acb Michael Niedermayer
            fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1145
            fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1146 5ad984c9 Loren Merritt
        }else{
1147
            for(i8=0; i8<4; i8++){
1148
                const int x8 = i8&1;
1149
                const int y8 = i8>>1;
1150 115329f1 Diego Biurrun
1151 5ad984c9 Loren Merritt
                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1152
                    continue;
1153
                h->sub_mb_type[i8] = sub_mb_type;
1154 115329f1 Diego Biurrun
1155 5ad984c9 Loren Merritt
                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1156
                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1157 cec93959 Loren Merritt
                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1158
                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1159 115329f1 Diego Biurrun
1160 5ad984c9 Loren Merritt
                /* col_zero_flag */
1161 2ccd25d0 Michael Niedermayer
                if(!IS_INTRA(mb_type_col[0]) && (   l1ref0[x8 + y8*b8_stride] == 0
1162
                                              || (l1ref0[x8 + y8*b8_stride] < 0 && l1ref1[x8 + y8*b8_stride] == 0
1163 bf4e3bd2 Måns Rullgård
                                                  && (h->x264_build>33 || !h->x264_build)))){
1164 2ccd25d0 Michael Niedermayer
                    const int16_t (*l1mv)[2]= l1ref0[x8 + y8*b8_stride] == 0 ? l1mv0 : l1mv1;
1165 f1f17e54 Loren Merritt
                    if(IS_SUB_8X8(sub_mb_type)){
1166 2ccd25d0 Michael Niedermayer
                        const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1167 c26abfa5 Diego Biurrun
                        if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1168 f1f17e54 Loren Merritt
                            if(ref[0] == 0)
1169
                                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1170
                            if(ref[1] == 0)
1171
                                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1172
                        }
1173
                    }else
1174 5ad984c9 Loren Merritt
                    for(i4=0; i4<4; i4++){
1175 2ccd25d0 Michael Niedermayer
                        const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1176 c26abfa5 Diego Biurrun
                        if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1177 5ad984c9 Loren Merritt
                            if(ref[0] == 0)
1178
                                *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1179
                            if(ref[1] == 0)
1180
                                *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1181
                        }
1182
                    }
1183
                }
1184
            }
1185
        }
1186
    }else{ /* direct temporal mv pred */
1187 5d18eaad Loren Merritt
        const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1188
        const int *dist_scale_factor = h->dist_scale_factor;
1189 f4d3382d Michael Niedermayer
        int ref_offset= 0;
1190 5d18eaad Loren Merritt
1191 cc615d2c Michael Niedermayer
        if(FRAME_MBAFF && IS_INTERLACED(*mb_type)){
1192 8b1fd554 Michael Niedermayer
            map_col_to_list0[0] = h->map_col_to_list0_field[s->mb_y&1][0];
1193
            map_col_to_list0[1] = h->map_col_to_list0_field[s->mb_y&1][1];
1194
            dist_scale_factor   =h->dist_scale_factor_field[s->mb_y&1];
1195 cc615d2c Michael Niedermayer
        }
1196 48e025e5 Michael Niedermayer
        if(h->ref_list[1][0].mbaff && IS_INTERLACED(mb_type_col[0]))
1197 f4d3382d Michael Niedermayer
            ref_offset += 16;
1198 48e025e5 Michael Niedermayer
1199 cc615d2c Michael Niedermayer
        if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col[0])){
1200
            /* FIXME assumes direct_8x8_inference == 1 */
1201 c210fa61 Michael Niedermayer
            int y_shift  = 2*!IS_INTERLACED(*mb_type);
1202 5d18eaad Loren Merritt
1203 cc615d2c Michael Niedermayer
            for(i8=0; i8<4; i8++){
1204
                const int x8 = i8&1;
1205
                const int y8 = i8>>1;
1206
                int ref0, scale;
1207
                const int16_t (*l1mv)[2]= l1mv0;
1208 5d18eaad Loren Merritt
1209 cc615d2c Michael Niedermayer
                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1210
                    continue;
1211
                h->sub_mb_type[i8] = sub_mb_type;
1212
1213
                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1214
                if(IS_INTRA(mb_type_col[y8])){
1215
                    fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1216
                    fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1217
                    fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1218
                    continue;
1219
                }
1220
1221
                ref0 = l1ref0[x8 + y8*b8_stride];
1222
                if(ref0 >= 0)
1223 f4d3382d Michael Niedermayer
                    ref0 = map_col_to_list0[0][ref0 + ref_offset];
1224 cc615d2c Michael Niedermayer
                else{
1225 f4d3382d Michael Niedermayer
                    ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1226 cc615d2c Michael Niedermayer
                    l1mv= l1mv1;
1227
                }
1228
                scale = dist_scale_factor[ref0];
1229
                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1230
1231
                {
1232
                    const int16_t *mv_col = l1mv[x8*3 + y8*b4_stride];
1233
                    int my_col = (mv_col[1]<<y_shift)/2;
1234
                    int mx = (scale * mv_col[0] + 128) >> 8;
1235
                    int my = (scale * my_col + 128) >> 8;
1236
                    fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1237
                    fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1238 5d18eaad Loren Merritt
                }
1239
            }
1240 cc615d2c Michael Niedermayer
            return;
1241
        }
1242 5d18eaad Loren Merritt
1243
        /* one-to-one mv scaling */
1244
1245 5ad984c9 Loren Merritt
        if(IS_16X16(*mb_type)){
1246 fda51641 Michael Niedermayer
            int ref, mv0, mv1;
1247
1248 5ad984c9 Loren Merritt
            fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1249 d00eac6c Michael Niedermayer
            if(IS_INTRA(mb_type_col[0])){
1250 fda51641 Michael Niedermayer
                ref=mv0=mv1=0;
1251 5ad984c9 Loren Merritt
            }else{
1252 f4d3382d Michael Niedermayer
                const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0] + ref_offset]
1253
                                                : map_col_to_list0[1][l1ref1[0] + ref_offset];
1254 5d18eaad Loren Merritt
                const int scale = dist_scale_factor[ref0];
1255 8583bef8 Michael Niedermayer
                const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1256 5ad984c9 Loren Merritt
                int mv_l0[2];
1257 5d18eaad Loren Merritt
                mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1258
                mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1259 fda51641 Michael Niedermayer
                ref= ref0;
1260
                mv0= pack16to32(mv_l0[0],mv_l0[1]);
1261
                mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1262 5ad984c9 Loren Merritt
            }
1263 fda51641 Michael Niedermayer
            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1264
            fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1265
            fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1266 5ad984c9 Loren Merritt
        }else{
1267
            for(i8=0; i8<4; i8++){
1268
                const int x8 = i8&1;
1269
                const int y8 = i8>>1;
1270 5d18eaad Loren Merritt
                int ref0, scale;
1271 bf4e3bd2 Måns Rullgård
                const int16_t (*l1mv)[2]= l1mv0;
1272 8583bef8 Michael Niedermayer
1273 5ad984c9 Loren Merritt
                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1274
                    continue;
1275
                h->sub_mb_type[i8] = sub_mb_type;
1276 5d18eaad Loren Merritt
                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1277 d00eac6c Michael Niedermayer
                if(IS_INTRA(mb_type_col[0])){
1278 5ad984c9 Loren Merritt
                    fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1279
                    fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1280
                    fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1281
                    continue;
1282
                }
1283 115329f1 Diego Biurrun
1284 f4d3382d Michael Niedermayer
                ref0 = l1ref0[x8 + y8*b8_stride] + ref_offset;
1285 2f944356 Loren Merritt
                if(ref0 >= 0)
1286 5d18eaad Loren Merritt
                    ref0 = map_col_to_list0[0][ref0];
1287 8583bef8 Michael Niedermayer
                else{
1288 f4d3382d Michael Niedermayer
                    ref0 = map_col_to_list0[1][l1ref1[x8 + y8*b8_stride] + ref_offset];
1289 8583bef8 Michael Niedermayer
                    l1mv= l1mv1;
1290
                }
1291 5d18eaad Loren Merritt
                scale = dist_scale_factor[ref0];
1292 115329f1 Diego Biurrun
1293 5ad984c9 Loren Merritt
                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1294 f1f17e54 Loren Merritt
                if(IS_SUB_8X8(sub_mb_type)){
1295 2ccd25d0 Michael Niedermayer
                    const int16_t *mv_col = l1mv[x8*3 + y8*3*b4_stride];
1296 5d18eaad Loren Merritt
                    int mx = (scale * mv_col[0] + 128) >> 8;
1297
                    int my = (scale * mv_col[1] + 128) >> 8;
1298 f1f17e54 Loren Merritt
                    fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1299
                    fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1300
                }else
1301 5ad984c9 Loren Merritt
                for(i4=0; i4<4; i4++){
1302 2ccd25d0 Michael Niedermayer
                    const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*b4_stride];
1303 5ad984c9 Loren Merritt
                    int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1304 5d18eaad Loren Merritt
                    mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1305
                    mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1306 5ad984c9 Loren Merritt
                    *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1307
                        pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1308
                }
1309
            }
1310
        }
1311
    }
1312
}
1313
1314 0da71265 Michael Niedermayer
static inline void write_back_motion(H264Context *h, int mb_type){
1315
    MpegEncContext * const s = &h->s;
1316
    const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1317
    const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1318
    int list;
1319
1320 2ea39252 Loren Merritt
    if(!USES_LIST(mb_type, 0))
1321
        fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1322
1323 3425501d Michael Niedermayer
    for(list=0; list<h->list_count; list++){
1324 0da71265 Michael Niedermayer
        int y;
1325 53b19144 Loren Merritt
        if(!USES_LIST(mb_type, list))
1326 5ad984c9 Loren Merritt
            continue;
1327 115329f1 Diego Biurrun
1328 0da71265 Michael Niedermayer
        for(y=0; y<4; y++){
1329
            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1330
            *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1331
        }
1332 9e528114 Laurent Aimar
        if( h->pps.cabac ) {
1333 e6e77eb6 Loren Merritt
            if(IS_SKIP(mb_type))
1334
                fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1335
            else
1336 9e528114 Laurent Aimar
            for(y=0; y<4; y++){
1337
                *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1338
                *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1339
            }
1340
        }
1341 53b19144 Loren Merritt
1342
        {
1343 191e8ca7 Måns Rullgård
            int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1344 53b19144 Loren Merritt
            ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1345
            ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1346
            ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1347
            ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1348 0da71265 Michael Niedermayer
        }
1349
    }
1350 115329f1 Diego Biurrun
1351 9f5c1037 Michael Niedermayer
    if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1352 5ad984c9 Loren Merritt
        if(IS_8X8(mb_type)){
1353 53b19144 Loren Merritt
            uint8_t *direct_table = &h->direct_table[b8_xy];
1354
            direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1355
            direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1356
            direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1357 5ad984c9 Loren Merritt
        }
1358
    }
1359 0da71265 Michael Niedermayer
}
1360
1361
/**
1362
 * Decodes a network abstraction layer unit.
1363
 * @param consumed is the number of bytes used as input
1364
 * @param length is the length of the array
1365 3b66c4c5 Kevin Baragona
 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1366 115329f1 Diego Biurrun
 * @returns decoded bytes, might be src+1 if no escapes
1367 0da71265 Michael Niedermayer
 */
1368 30317501 Michael Niedermayer
static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1369 0da71265 Michael Niedermayer
    int i, si, di;
1370
    uint8_t *dst;
1371 24456882 Andreas Öman
    int bufidx;
1372 0da71265 Michael Niedermayer
1373 bb270c08 Diego Biurrun
//    src[0]&0x80;                //forbidden bit
1374 0da71265 Michael Niedermayer
    h->nal_ref_idc= src[0]>>5;
1375
    h->nal_unit_type= src[0]&0x1F;
1376
1377
    src++; length--;
1378 115329f1 Diego Biurrun
#if 0
1379 0da71265 Michael Niedermayer
    for(i=0; i<length; i++)
1380
        printf("%2X ", src[i]);
1381
#endif
1382 e08715d3 Michael Niedermayer
1383
#ifdef HAVE_FAST_UNALIGNED
1384
# ifdef HAVE_FAST_64BIT
1385
#   define RS 7
1386
    for(i=0; i+1<length; i+=9){
1387
        if(!((~*(uint64_t*)(src+i) & (*(uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
1388
# else
1389
#   define RS 3
1390
    for(i=0; i+1<length; i+=5){
1391
        if(!((~*(uint32_t*)(src+i) & (*(uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
1392
# endif
1393
            continue;
1394
        if(i>0 && !src[i]) i--;
1395
        while(src[i]) i++;
1396
#else
1397
#   define RS 0
1398 0da71265 Michael Niedermayer
    for(i=0; i+1<length; i+=2){
1399
        if(src[i]) continue;
1400
        if(i>0 && src[i-1]==0) i--;
1401 e08715d3 Michael Niedermayer
#endif
1402 0da71265 Michael Niedermayer
        if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1403
            if(src[i+2]!=3){
1404
                /* startcode, so we must be past the end */
1405
                length=i;
1406
            }
1407
            break;
1408
        }
1409 abb27cfb Michael Niedermayer
        i-= RS;
1410 0da71265 Michael Niedermayer
    }
1411
1412
    if(i>=length-1){ //no escaped 0
1413
        *dst_length= length;
1414
        *consumed= length+1; //+1 for the header
1415 115329f1 Diego Biurrun
        return src;
1416 0da71265 Michael Niedermayer
    }
1417
1418 24456882 Andreas Öman
    bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1419 d4369630 Alexander Strange
    h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
1420 24456882 Andreas Öman
    dst= h->rbsp_buffer[bufidx];
1421 0da71265 Michael Niedermayer
1422 ac658be5 Francois Oligny-Lemieux
    if (dst == NULL){
1423
        return NULL;
1424
    }
1425
1426 3b66c4c5 Kevin Baragona
//printf("decoding esc\n");
1427 593af7cd Michael Niedermayer
    memcpy(dst, src, i);
1428
    si=di=i;
1429
    while(si+2<length){
1430 0da71265 Michael Niedermayer
        //remove escapes (very rare 1:2^22)
1431 593af7cd Michael Niedermayer
        if(src[si+2]>3){
1432
            dst[di++]= src[si++];
1433
            dst[di++]= src[si++];
1434
        }else if(src[si]==0 && src[si+1]==0){
1435 0da71265 Michael Niedermayer
            if(src[si+2]==3){ //escape
1436
                dst[di++]= 0;
1437
                dst[di++]= 0;
1438
                si+=3;
1439 c8470cc1 Michael Niedermayer
                continue;
1440 0da71265 Michael Niedermayer
            }else //next start code
1441 593af7cd Michael Niedermayer
                goto nsc;
1442 0da71265 Michael Niedermayer
        }
1443
1444
        dst[di++]= src[si++];
1445
    }
1446 593af7cd Michael Niedermayer
    while(si<length)
1447
        dst[di++]= src[si++];
1448
nsc:
1449 0da71265 Michael Niedermayer
1450 d4369630 Alexander Strange
    memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
1451
1452 0da71265 Michael Niedermayer
    *dst_length= di;
1453
    *consumed= si + 1;//+1 for the header
1454 90b5b51e Diego Biurrun
//FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1455 0da71265 Michael Niedermayer
    return dst;
1456
}
1457
1458
/**
1459
 * identifies the exact end of the bitstream
1460
 * @return the length of the trailing, or 0 if damaged
1461
 */
1462 30317501 Michael Niedermayer
static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1463 0da71265 Michael Niedermayer
    int v= *src;
1464
    int r;
1465
1466 a9c9a240 Michel Bardiaux
    tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1467 0da71265 Michael Niedermayer
1468
    for(r=1; r<9; r++){
1469
        if(v&1) return r;
1470
        v>>=1;
1471
    }
1472
    return 0;
1473
}
1474
1475
/**
1476 1412060e Diego Biurrun
 * IDCT transforms the 16 dc values and dequantizes them.
1477 0da71265 Michael Niedermayer
 * @param qp quantization parameter
1478
 */
1479 239ea04c Loren Merritt
static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1480 0da71265 Michael Niedermayer
#define stride 16
1481
    int i;
1482
    int temp[16]; //FIXME check if this is a good idea
1483
    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1484
    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1485
1486
//memset(block, 64, 2*256);
1487
//return;
1488
    for(i=0; i<4; i++){
1489
        const int offset= y_offset[i];
1490
        const int z0= block[offset+stride*0] + block[offset+stride*4];
1491
        const int z1= block[offset+stride*0] - block[offset+stride*4];
1492
        const int z2= block[offset+stride*1] - block[offset+stride*5];
1493
        const int z3= block[offset+stride*1] + block[offset+stride*5];
1494
1495
        temp[4*i+0]= z0+z3;
1496
        temp[4*i+1]= z1+z2;
1497
        temp[4*i+2]= z1-z2;
1498
        temp[4*i+3]= z0-z3;
1499
    }
1500
1501
    for(i=0; i<4; i++){
1502
        const int offset= x_offset[i];
1503
        const int z0= temp[4*0+i] + temp[4*2+i];
1504
        const int z1= temp[4*0+i] - temp[4*2+i];
1505
        const int z2= temp[4*1+i] - temp[4*3+i];
1506
        const int z3= temp[4*1+i] + temp[4*3+i];
1507
1508 1412060e Diego Biurrun
        block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
1509 239ea04c Loren Merritt
        block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1510
        block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1511
        block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1512 0da71265 Michael Niedermayer
    }
1513
}
1514
1515 e5017ab8 Laurent Aimar
#if 0
1516 0da71265 Michael Niedermayer
/**
1517 1412060e Diego Biurrun
 * DCT transforms the 16 dc values.
1518 0da71265 Michael Niedermayer
 * @param qp quantization parameter ??? FIXME
1519
 */
1520
static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1521
//    const int qmul= dequant_coeff[qp][0];
1522
    int i;
1523
    int temp[16]; //FIXME check if this is a good idea
1524
    static const int x_offset[4]={0, 1*stride, 4* stride,  5*stride};
1525
    static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1526

1527
    for(i=0; i<4; i++){
1528
        const int offset= y_offset[i];
1529
        const int z0= block[offset+stride*0] + block[offset+stride*4];
1530
        const int z1= block[offset+stride*0] - block[offset+stride*4];
1531
        const int z2= block[offset+stride*1] - block[offset+stride*5];
1532
        const int z3= block[offset+stride*1] + block[offset+stride*5];
1533

1534
        temp[4*i+0]= z0+z3;
1535
        temp[4*i+1]= z1+z2;
1536
        temp[4*i+2]= z1-z2;
1537
        temp[4*i+3]= z0-z3;
1538
    }
1539

1540
    for(i=0; i<4; i++){
1541
        const int offset= x_offset[i];
1542
        const int z0= temp[4*0+i] + temp[4*2+i];
1543
        const int z1= temp[4*0+i] - temp[4*2+i];
1544
        const int z2= temp[4*1+i] - temp[4*3+i];
1545
        const int z3= temp[4*1+i] + temp[4*3+i];
1546

1547
        block[stride*0 +offset]= (z0 + z3)>>1;
1548
        block[stride*2 +offset]= (z1 + z2)>>1;
1549
        block[stride*8 +offset]= (z1 - z2)>>1;
1550
        block[stride*10+offset]= (z0 - z3)>>1;
1551
    }
1552
}
1553 e5017ab8 Laurent Aimar
#endif
1554
1555 0da71265 Michael Niedermayer
#undef xStride
1556
#undef stride
1557
1558 239ea04c Loren Merritt
static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1559 0da71265 Michael Niedermayer
    const int stride= 16*2;
1560
    const int xStride= 16;
1561
    int a,b,c,d,e;
1562
1563
    a= block[stride*0 + xStride*0];
1564
    b= block[stride*0 + xStride*1];
1565
    c= block[stride*1 + xStride*0];
1566
    d= block[stride*1 + xStride*1];
1567
1568
    e= a-b;
1569
    a= a+b;
1570
    b= c-d;
1571
    c= c+d;
1572
1573 239ea04c Loren Merritt
    block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1574
    block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1575
    block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1576
    block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1577 0da71265 Michael Niedermayer
}
1578
1579 e5017ab8 Laurent Aimar
#if 0
1580 0da71265 Michael Niedermayer
static void chroma_dc_dct_c(DCTELEM *block){
1581
    const int stride= 16*2;
1582
    const int xStride= 16;
1583
    int a,b,c,d,e;
1584

1585
    a= block[stride*0 + xStride*0];
1586
    b= block[stride*0 + xStride*1];
1587
    c= block[stride*1 + xStride*0];
1588
    d= block[stride*1 + xStride*1];
1589

1590
    e= a-b;
1591
    a= a+b;
1592
    b= c-d;
1593
    c= c+d;
1594

1595
    block[stride*0 + xStride*0]= (a+c);
1596
    block[stride*0 + xStride*1]= (e+b);
1597
    block[stride*1 + xStride*0]= (a-c);
1598
    block[stride*1 + xStride*1]= (e-b);
1599
}
1600 e5017ab8 Laurent Aimar
#endif
1601 0da71265 Michael Niedermayer
1602
/**
1603
 * gets the chroma qp.
1604
 */
1605 4691a77d Andreas Öman
static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1606 5a78bfbd Michael Niedermayer
    return h->pps.chroma_qp_table[t][qscale];
1607 0da71265 Michael Niedermayer
}
1608
1609
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1610
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1611
                           int src_x_offset, int src_y_offset,
1612
                           qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1613
    MpegEncContext * const s = &h->s;
1614
    const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1615 5d18eaad Loren Merritt
    int my=       h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1616 0da71265 Michael Niedermayer
    const int luma_xy= (mx&3) + ((my&3)<<2);
1617 5d18eaad Loren Merritt
    uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1618
    uint8_t * src_cb, * src_cr;
1619
    int extra_width= h->emu_edge_width;
1620
    int extra_height= h->emu_edge_height;
1621 0da71265 Michael Niedermayer
    int emu=0;
1622
    const int full_mx= mx>>2;
1623
    const int full_my= my>>2;
1624 fbd312fd Loren Merritt
    const int pic_width  = 16*s->mb_width;
1625 0d43dd8c Jeff Downs
    const int pic_height = 16*s->mb_height >> MB_FIELD;
1626 115329f1 Diego Biurrun
1627 0da71265 Michael Niedermayer
    if(mx&7) extra_width -= 3;
1628
    if(my&7) extra_height -= 3;
1629 115329f1 Diego Biurrun
1630
    if(   full_mx < 0-extra_width
1631
       || full_my < 0-extra_height
1632
       || full_mx + 16/*FIXME*/ > pic_width + extra_width
1633 fbd312fd Loren Merritt
       || full_my + 16/*FIXME*/ > pic_height + extra_height){
1634 5d18eaad Loren Merritt
        ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1635
            src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1636 0da71265 Michael Niedermayer
        emu=1;
1637
    }
1638 115329f1 Diego Biurrun
1639 5d18eaad Loren Merritt
    qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1640 0da71265 Michael Niedermayer
    if(!square){
1641 5d18eaad Loren Merritt
        qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1642 0da71265 Michael Niedermayer
    }
1643 115329f1 Diego Biurrun
1644 87352549 Michael Niedermayer
    if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1645 115329f1 Diego Biurrun
1646 0d43dd8c Jeff Downs
    if(MB_FIELD){
1647 5d18eaad Loren Merritt
        // chroma offset when predicting from a field of opposite parity
1648 2143b118 Jeff Downs
        my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1649 5d18eaad Loren Merritt
        emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1650
    }
1651
    src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1652
    src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1653
1654 0da71265 Michael Niedermayer
    if(emu){
1655 5d18eaad Loren Merritt
        ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1656 0da71265 Michael Niedermayer
            src_cb= s->edge_emu_buffer;
1657
    }
1658 5d18eaad Loren Merritt
    chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1659 0da71265 Michael Niedermayer
1660
    if(emu){
1661 5d18eaad Loren Merritt
        ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1662 0da71265 Michael Niedermayer
            src_cr= s->edge_emu_buffer;
1663
    }
1664 5d18eaad Loren Merritt
    chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1665 0da71265 Michael Niedermayer
}
1666
1667 9f2d1b4f Loren Merritt
static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1668 0da71265 Michael Niedermayer
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1669
                           int x_offset, int y_offset,
1670
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1671
                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1672
                           int list0, int list1){
1673
    MpegEncContext * const s = &h->s;
1674
    qpel_mc_func *qpix_op=  qpix_put;
1675
    h264_chroma_mc_func chroma_op= chroma_put;
1676 115329f1 Diego Biurrun
1677 5d18eaad Loren Merritt
    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
1678
    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
1679
    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
1680 0da71265 Michael Niedermayer
    x_offset += 8*s->mb_x;
1681 0d43dd8c Jeff Downs
    y_offset += 8*(s->mb_y >> MB_FIELD);
1682 115329f1 Diego Biurrun
1683 0da71265 Michael Niedermayer
    if(list0){
1684 1924f3ce Michael Niedermayer
        Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1685 0da71265 Michael Niedermayer
        mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1686
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
1687
                           qpix_op, chroma_op);
1688
1689
        qpix_op=  qpix_avg;
1690
        chroma_op= chroma_avg;
1691
    }
1692
1693
    if(list1){
1694 1924f3ce Michael Niedermayer
        Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1695 0da71265 Michael Niedermayer
        mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1696
                           dest_y, dest_cb, dest_cr, x_offset, y_offset,
1697
                           qpix_op, chroma_op);
1698
    }
1699
}
1700
1701 9f2d1b4f Loren Merritt
static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1702
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1703
                           int x_offset, int y_offset,
1704
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1705
                           h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1706
                           h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1707
                           int list0, int list1){
1708
    MpegEncContext * const s = &h->s;
1709
1710 5d18eaad Loren Merritt
    dest_y  += 2*x_offset + 2*y_offset*h->  mb_linesize;
1711
    dest_cb +=   x_offset +   y_offset*h->mb_uvlinesize;
1712
    dest_cr +=   x_offset +   y_offset*h->mb_uvlinesize;
1713 9f2d1b4f Loren Merritt
    x_offset += 8*s->mb_x;
1714 0d43dd8c Jeff Downs
    y_offset += 8*(s->mb_y >> MB_FIELD);
1715 115329f1 Diego Biurrun
1716 9f2d1b4f Loren Merritt
    if(list0 && list1){
1717
        /* don't optimize for luma-only case, since B-frames usually
1718
         * use implicit weights => chroma too. */
1719
        uint8_t *tmp_cb = s->obmc_scratchpad;
1720 5d18eaad Loren Merritt
        uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1721
        uint8_t *tmp_y  = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1722 9f2d1b4f Loren Merritt
        int refn0 = h->ref_cache[0][ scan8[n] ];
1723
        int refn1 = h->ref_cache[1][ scan8[n] ];
1724
1725
        mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1726
                    dest_y, dest_cb, dest_cr,
1727
                    x_offset, y_offset, qpix_put, chroma_put);
1728
        mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1729
                    tmp_y, tmp_cb, tmp_cr,
1730
                    x_offset, y_offset, qpix_put, chroma_put);
1731
1732
        if(h->use_weight == 2){
1733
            int weight0 = h->implicit_weight[refn0][refn1];
1734
            int weight1 = 64 - weight0;
1735 5d18eaad Loren Merritt
            luma_weight_avg(  dest_y,  tmp_y,  h->  mb_linesize, 5, weight0, weight1, 0);
1736
            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1737
            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1738 9f2d1b4f Loren Merritt
        }else{
1739 5d18eaad Loren Merritt
            luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1740 115329f1 Diego Biurrun
                            h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1741 e8b56208 Loren Merritt
                            h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1742 5d18eaad Loren Merritt
            chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1743 115329f1 Diego Biurrun
                            h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1744 e8b56208 Loren Merritt
                            h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1745 5d18eaad Loren Merritt
            chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1746 115329f1 Diego Biurrun
                            h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1747 e8b56208 Loren Merritt
                            h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1748 9f2d1b4f Loren Merritt
        }
1749
    }else{
1750
        int list = list1 ? 1 : 0;
1751
        int refn = h->ref_cache[list][ scan8[n] ];
1752
        Picture *ref= &h->ref_list[list][refn];
1753
        mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1754
                    dest_y, dest_cb, dest_cr, x_offset, y_offset,
1755
                    qpix_put, chroma_put);
1756
1757 5d18eaad Loren Merritt
        luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1758 9f2d1b4f Loren Merritt
                       h->luma_weight[list][refn], h->luma_offset[list][refn]);
1759
        if(h->use_weight_chroma){
1760 5d18eaad Loren Merritt
            chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1761 9f2d1b4f Loren Merritt
                             h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1762 5d18eaad Loren Merritt
            chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1763 9f2d1b4f Loren Merritt
                             h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1764
        }
1765
    }
1766
}
1767
1768
static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1769
                           uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1770
                           int x_offset, int y_offset,
1771
                           qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1772
                           qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1773 115329f1 Diego Biurrun
                           h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1774 9f2d1b4f Loren Merritt
                           int list0, int list1){
1775
    if((h->use_weight==2 && list0 && list1
1776
        && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1777
       || h->use_weight==1)
1778
        mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1779
                         x_offset, y_offset, qpix_put, chroma_put,
1780
                         weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1781
    else
1782
        mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1783
                    x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1784
}
1785
1786 513fbd8e Loren Merritt
static inline void prefetch_motion(H264Context *h, int list){
1787
    /* fetch pixels for estimated mv 4 macroblocks ahead
1788
     * optimized for 64byte cache lines */
1789
    MpegEncContext * const s = &h->s;
1790
    const int refn = h->ref_cache[list][scan8[0]];
1791
    if(refn >= 0){
1792
        const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1793
        const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1794
        uint8_t **src= h->ref_list[list][refn].data;
1795 5d18eaad Loren Merritt
        int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1796 513fbd8e Loren Merritt
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
1797
        off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1798
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1799
    }
1800
}
1801
1802 0da71265 Michael Niedermayer
static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1803
                      qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1804 9f2d1b4f Loren Merritt
                      qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1805
                      h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1806 0da71265 Michael Niedermayer
    MpegEncContext * const s = &h->s;
1807 64514ee8 Alexander Strange
    const int mb_xy= h->mb_xy;
1808 0da71265 Michael Niedermayer
    const int mb_type= s->current_picture.mb_type[mb_xy];
1809 115329f1 Diego Biurrun
1810 0da71265 Michael Niedermayer
    assert(IS_INTER(mb_type));
1811 115329f1 Diego Biurrun
1812 513fbd8e Loren Merritt
    prefetch_motion(h, 0);
1813
1814 0da71265 Michael Niedermayer
    if(IS_16X16(mb_type)){
1815
        mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1816
                qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1817 9f2d1b4f Loren Merritt
                &weight_op[0], &weight_avg[0],
1818 0da71265 Michael Niedermayer
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1819
    }else if(IS_16X8(mb_type)){
1820
        mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1821
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1822 9f2d1b4f Loren Merritt
                &weight_op[1], &weight_avg[1],
1823 0da71265 Michael Niedermayer
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1824
        mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1825
                qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1826 9f2d1b4f Loren Merritt
                &weight_op[1], &weight_avg[1],
1827 0da71265 Michael Niedermayer
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1828
    }else if(IS_8X16(mb_type)){
1829 5d18eaad Loren Merritt
        mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1830 0da71265 Michael Niedermayer
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1831 9f2d1b4f Loren Merritt
                &weight_op[2], &weight_avg[2],
1832 0da71265 Michael Niedermayer
                IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1833 5d18eaad Loren Merritt
        mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1834 0da71265 Michael Niedermayer
                qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1835 9f2d1b4f Loren Merritt
                &weight_op[2], &weight_avg[2],
1836 0da71265 Michael Niedermayer
                IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1837
    }else{
1838
        int i;
1839 115329f1 Diego Biurrun
1840 0da71265 Michael Niedermayer
        assert(IS_8X8(mb_type));
1841
1842
        for(i=0; i<4; i++){
1843
            const int sub_mb_type= h->sub_mb_type[i];
1844
            const int n= 4*i;
1845
            int x_offset= (i&1)<<2;
1846
            int y_offset= (i&2)<<1;
1847
1848
            if(IS_SUB_8X8(sub_mb_type)){
1849
                mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1850
                    qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1851 9f2d1b4f Loren Merritt
                    &weight_op[3], &weight_avg[3],
1852 0da71265 Michael Niedermayer
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1853
            }else if(IS_SUB_8X4(sub_mb_type)){
1854
                mc_part(h, n  , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1855
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1856 9f2d1b4f Loren Merritt
                    &weight_op[4], &weight_avg[4],
1857 0da71265 Michael Niedermayer
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1858
                mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1859
                    qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1860 9f2d1b4f Loren Merritt
                    &weight_op[4], &weight_avg[4],
1861 0da71265 Michael Niedermayer
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1862
            }else if(IS_SUB_4X8(sub_mb_type)){
1863 5d18eaad Loren Merritt
                mc_part(h, n  , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1864 0da71265 Michael Niedermayer
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1865 9f2d1b4f Loren Merritt
                    &weight_op[5], &weight_avg[5],
1866 0da71265 Michael Niedermayer
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1867 5d18eaad Loren Merritt
                mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1868 0da71265 Michael Niedermayer
                    qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1869 9f2d1b4f Loren Merritt
                    &weight_op[5], &weight_avg[5],
1870 0da71265 Michael Niedermayer
                    IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1871
            }else{
1872
                int j;
1873
                assert(IS_SUB_4X4(sub_mb_type));
1874
                for(j=0; j<4; j++){
1875
                    int sub_x_offset= x_offset + 2*(j&1);
1876
                    int sub_y_offset= y_offset +   (j&2);
1877
                    mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1878
                        qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1879 9f2d1b4f Loren Merritt
                        &weight_op[6], &weight_avg[6],
1880 0da71265 Michael Niedermayer
                        IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1881
                }
1882
            }
1883
        }
1884
    }
1885 513fbd8e Loren Merritt
1886
    prefetch_motion(h, 1);
1887 0da71265 Michael Niedermayer
}
1888
1889 8140955d Michael Niedermayer
static av_cold void init_cavlc_level_tab(void){
1890
    int suffix_length, mask;
1891
    unsigned int i;
1892
1893
    for(suffix_length=0; suffix_length<7; suffix_length++){
1894
        for(i=0; i<(1<<LEVEL_TAB_BITS); i++){
1895
            int prefix= LEVEL_TAB_BITS - av_log2(2*i);
1896
            int level_code= (prefix<<suffix_length) + (i>>(LEVEL_TAB_BITS-prefix-1-suffix_length)) - (1<<suffix_length);
1897
1898
            mask= -(level_code&1);
1899
            level_code= (((2+level_code)>>1) ^ mask) - mask;
1900
            if(prefix + 1 + suffix_length <= LEVEL_TAB_BITS){
1901
                cavlc_level_tab[suffix_length][i][0]= level_code;
1902
                cavlc_level_tab[suffix_length][i][1]= prefix + 1 + suffix_length;
1903
            }else if(prefix + 1 <= LEVEL_TAB_BITS){
1904
                cavlc_level_tab[suffix_length][i][0]= prefix+100;
1905
                cavlc_level_tab[suffix_length][i][1]= prefix + 1;
1906
            }else{
1907
                cavlc_level_tab[suffix_length][i][0]= LEVEL_TAB_BITS+100;
1908
                cavlc_level_tab[suffix_length][i][1]= LEVEL_TAB_BITS;
1909
            }
1910
        }
1911
    }
1912
}
1913
1914 98a6fff9 Zuxy Meng
static av_cold void decode_init_vlc(void){
1915 0da71265 Michael Niedermayer
    static int done = 0;
1916
1917
    if (!done) {
1918
        int i;
1919 910e3668 Art Clarke
        int offset;
1920 0da71265 Michael Niedermayer
        done = 1;
1921
1922 910e3668 Art Clarke
        chroma_dc_coeff_token_vlc.table = chroma_dc_coeff_token_vlc_table;
1923
        chroma_dc_coeff_token_vlc.table_allocated = chroma_dc_coeff_token_vlc_table_size;
1924 115329f1 Diego Biurrun
        init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1925 0da71265 Michael Niedermayer
                 &chroma_dc_coeff_token_len [0], 1, 1,
1926 910e3668 Art Clarke
                 &chroma_dc_coeff_token_bits[0], 1, 1,
1927
                 INIT_VLC_USE_NEW_STATIC);
1928 0da71265 Michael Niedermayer
1929 910e3668 Art Clarke
        offset = 0;
1930 0da71265 Michael Niedermayer
        for(i=0; i<4; i++){
1931 910e3668 Art Clarke
            coeff_token_vlc[i].table = coeff_token_vlc_tables+offset;
1932
            coeff_token_vlc[i].table_allocated = coeff_token_vlc_tables_size[i];
1933 115329f1 Diego Biurrun
            init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1934 0da71265 Michael Niedermayer
                     &coeff_token_len [i][0], 1, 1,
1935 910e3668 Art Clarke
                     &coeff_token_bits[i][0], 1, 1,
1936
                     INIT_VLC_USE_NEW_STATIC);
1937
            offset += coeff_token_vlc_tables_size[i];
1938 0da71265 Michael Niedermayer
        }
1939 910e3668 Art Clarke
        /*
1940
         * This is a one time safety check to make sure that
1941
         * the packed static coeff_token_vlc table sizes
1942
         * were initialized correctly.
1943
         */
1944 37d3e066 Aurelien Jacobs
        assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
1945 0da71265 Michael Niedermayer
1946
        for(i=0; i<3; i++){
1947 910e3668 Art Clarke
            chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
1948
            chroma_dc_total_zeros_vlc[i].table_allocated = chroma_dc_total_zeros_vlc_tables_size;
1949
            init_vlc(&chroma_dc_total_zeros_vlc[i],
1950
                     CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1951 0da71265 Michael Niedermayer
                     &chroma_dc_total_zeros_len [i][0], 1, 1,
1952 910e3668 Art Clarke
                     &chroma_dc_total_zeros_bits[i][0], 1, 1,
1953
                     INIT_VLC_USE_NEW_STATIC);
1954 0da71265 Michael Niedermayer
        }
1955
        for(i=0; i<15; i++){
1956 910e3668 Art Clarke
            total_zeros_vlc[i].table = total_zeros_vlc_tables[i];
1957
            total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size;
1958
            init_vlc(&total_zeros_vlc[i],
1959
                     TOTAL_ZEROS_VLC_BITS, 16,
1960 0da71265 Michael Niedermayer
                     &total_zeros_len [i][0], 1, 1,
1961 910e3668 Art Clarke
                     &total_zeros_bits[i][0], 1, 1,
1962
                     INIT_VLC_USE_NEW_STATIC);
1963 0da71265 Michael Niedermayer
        }
1964
1965
        for(i=0; i<6; i++){
1966 910e3668 Art Clarke
            run_vlc[i].table = run_vlc_tables[i];
1967
            run_vlc[i].table_allocated = run_vlc_tables_size;
1968
            init_vlc(&run_vlc[i],
1969
                     RUN_VLC_BITS, 7,
1970 0da71265 Michael Niedermayer
                     &run_len [i][0], 1, 1,
1971 910e3668 Art Clarke
                     &run_bits[i][0], 1, 1,
1972
                     INIT_VLC_USE_NEW_STATIC);
1973 0da71265 Michael Niedermayer
        }
1974 910e3668 Art Clarke
        run7_vlc.table = run7_vlc_table,
1975
        run7_vlc.table_allocated = run7_vlc_table_size;
1976 115329f1 Diego Biurrun
        init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1977 0da71265 Michael Niedermayer
                 &run_len [6][0], 1, 1,
1978 910e3668 Art Clarke
                 &run_bits[6][0], 1, 1,
1979
                 INIT_VLC_USE_NEW_STATIC);
1980 8140955d Michael Niedermayer
1981
        init_cavlc_level_tab();
1982 0da71265 Michael Niedermayer
    }
1983
}
1984
1985
static void free_tables(H264Context *h){
1986 7978debd Andreas Öman
    int i;
1987 afebe2f7 Andreas Öman
    H264Context *hx;
1988 0da71265 Michael Niedermayer
    av_freep(&h->intra4x4_pred_mode);
1989 e5017ab8 Laurent Aimar
    av_freep(&h->chroma_pred_mode_table);
1990
    av_freep(&h->cbp_table);
1991 9e528114 Laurent Aimar
    av_freep(&h->mvd_table[0]);
1992
    av_freep(&h->mvd_table[1]);
1993 5ad984c9 Loren Merritt
    av_freep(&h->direct_table);
1994 0da71265 Michael Niedermayer
    av_freep(&h->non_zero_count);
1995
    av_freep(&h->slice_table_base);
1996
    h->slice_table= NULL;
1997 e5017ab8 Laurent Aimar
1998 0da71265 Michael Niedermayer
    av_freep(&h->mb2b_xy);
1999
    av_freep(&h->mb2b8_xy);
2000 9f2d1b4f Loren Merritt
2001 afebe2f7 Andreas Öman
    for(i = 0; i < h->s.avctx->thread_count; i++) {
2002
        hx = h->thread_context[i];
2003
        if(!hx) continue;
2004
        av_freep(&hx->top_borders[1]);
2005
        av_freep(&hx->top_borders[0]);
2006
        av_freep(&hx->s.obmc_scratchpad);
2007
    }
2008 0da71265 Michael Niedermayer
}
2009
2010 239ea04c Loren Merritt
static void init_dequant8_coeff_table(H264Context *h){
2011
    int i,q,x;
2012 548a1c8a Loren Merritt
    const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2013 239ea04c Loren Merritt
    h->dequant8_coeff[0] = h->dequant8_buffer[0];
2014
    h->dequant8_coeff[1] = h->dequant8_buffer[1];
2015
2016
    for(i=0; i<2; i++ ){
2017
        if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2018
            h->dequant8_coeff[1] = h->dequant8_buffer[0];
2019
            break;
2020
        }
2021
2022
        for(q=0; q<52; q++){
2023 d9ec210b Diego Pettenò
            int shift = div6[q];
2024
            int idx = rem6[q];
2025 239ea04c Loren Merritt
            for(x=0; x<64; x++)
2026 548a1c8a Loren Merritt
                h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2027
                    ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2028
                    h->pps.scaling_matrix8[i][x]) << shift;
2029 239ea04c Loren Merritt
        }
2030
    }
2031
}
2032
2033
static void init_dequant4_coeff_table(H264Context *h){
2034
    int i,j,q,x;
2035 ab2e3e2c Loren Merritt
    const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2036 239ea04c Loren Merritt
    for(i=0; i<6; i++ ){
2037
        h->dequant4_coeff[i] = h->dequant4_buffer[i];
2038
        for(j=0; j<i; j++){
2039
            if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2040
                h->dequant4_coeff[i] = h->dequant4_buffer[j];
2041
                break;
2042
            }
2043
        }
2044
        if(j<i)
2045
            continue;
2046
2047
        for(q=0; q<52; q++){
2048 d9ec210b Diego Pettenò
            int shift = div6[q] + 2;
2049
            int idx = rem6[q];
2050 239ea04c Loren Merritt
            for(x=0; x<16; x++)
2051 ab2e3e2c Loren Merritt
                h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2052
                    ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2053 239ea04c Loren Merritt
                    h->pps.scaling_matrix4[i][x]) << shift;
2054
        }
2055
    }
2056
}
2057
2058
static void init_dequant_tables(H264Context *h){
2059
    int i,x;
2060
    init_dequant4_coeff_table(h);
2061
    if(h->pps.transform_8x8_mode)
2062
        init_dequant8_coeff_table(h);
2063
    if(h->sps.transform_bypass){
2064
        for(i=0; i<6; i++)
2065
            for(x=0; x<16; x++)
2066
                h->dequant4_coeff[i][0][x] = 1<<6;
2067
        if(h->pps.transform_8x8_mode)
2068
            for(i=0; i<2; i++)
2069
                for(x=0; x<64; x++)
2070
                    h->dequant8_coeff[i][0][x] = 1<<6;
2071
    }
2072
}
2073
2074
2075 0da71265 Michael Niedermayer
/**
2076
 * allocates tables.
2077 3b66c4c5 Kevin Baragona
 * needs width/height
2078 0da71265 Michael Niedermayer
 */
2079
static int alloc_tables(H264Context *h){
2080
    MpegEncContext * const s = &h->s;
2081 7bc9090a Michael Niedermayer
    const int big_mb_num= s->mb_stride * (s->mb_height+1);
2082 239ea04c Loren Merritt
    int x,y;
2083 0da71265 Michael Niedermayer
2084
    CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
2085 e5017ab8 Laurent Aimar
2086 53c05b1e Michael Niedermayer
    CHECKED_ALLOCZ(h->non_zero_count    , big_mb_num * 16 * sizeof(uint8_t))
2087 b735aeea Michael Niedermayer
    CHECKED_ALLOCZ(h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base))
2088 5d0e4cb8 Michael Niedermayer
    CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2089 0da71265 Michael Niedermayer
2090 7526ade2 Michael Niedermayer
    CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2091
    CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2092
    CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2093
    CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2094 e5017ab8 Laurent Aimar
2095 b735aeea Michael Niedermayer
    memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(*h->slice_table_base));
2096 5d18eaad Loren Merritt
    h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2097 0da71265 Michael Niedermayer
2098 a55f20bd Loren Merritt
    CHECKED_ALLOCZ(h->mb2b_xy  , big_mb_num * sizeof(uint32_t));
2099
    CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2100 0da71265 Michael Niedermayer
    for(y=0; y<s->mb_height; y++){
2101
        for(x=0; x<s->mb_width; x++){
2102 7bc9090a Michael Niedermayer
            const int mb_xy= x + y*s->mb_stride;
2103 0da71265 Michael Niedermayer
            const int b_xy = 4*x + 4*y*h->b_stride;
2104
            const int b8_xy= 2*x + 2*y*h->b8_stride;
2105 115329f1 Diego Biurrun
2106 0da71265 Michael Niedermayer
            h->mb2b_xy [mb_xy]= b_xy;
2107
            h->mb2b8_xy[mb_xy]= b8_xy;
2108
        }
2109
    }
2110 9f2d1b4f Loren Merritt
2111 9c6221ae Gert Vervoort
    s->obmc_scratchpad = NULL;
2112
2113 56edbd81 Loren Merritt
    if(!h->dequant4_coeff[0])
2114
        init_dequant_tables(h);
2115
2116 0da71265 Michael Niedermayer
    return 0;
2117
fail:
2118
    free_tables(h);
2119
    return -1;
2120
}
2121
2122 afebe2f7 Andreas Öman
/**
2123
 * Mimic alloc_tables(), but for every context thread.
2124
 */
2125
static void clone_tables(H264Context *dst, H264Context *src){
2126
    dst->intra4x4_pred_mode       = src->intra4x4_pred_mode;
2127
    dst->non_zero_count           = src->non_zero_count;
2128
    dst->slice_table              = src->slice_table;
2129
    dst->cbp_table                = src->cbp_table;
2130
    dst->mb2b_xy                  = src->mb2b_xy;
2131
    dst->mb2b8_xy                 = src->mb2b8_xy;
2132
    dst->chroma_pred_mode_table   = src->chroma_pred_mode_table;
2133
    dst->mvd_table[0]             = src->mvd_table[0];
2134
    dst->mvd_table[1]             = src->mvd_table[1];
2135
    dst->direct_table             = src->direct_table;
2136
2137
    dst->s.obmc_scratchpad = NULL;
2138
    ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2139
}
2140
2141
/**
2142
 * Init context
2143
 * Allocate buffers which are not shared amongst multiple threads.
2144
 */
2145
static int context_init(H264Context *h){
2146
    CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2147
    CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2148
2149
    return 0;
2150
fail:
2151
    return -1; // free_tables will clean up for us
2152
}
2153
2154 98a6fff9 Zuxy Meng
static av_cold void common_init(H264Context *h){
2155 0da71265 Michael Niedermayer
    MpegEncContext * const s = &h->s;
2156
2157
    s->width = s->avctx->width;
2158
    s->height = s->avctx->height;
2159
    s->codec_id= s->avctx->codec->id;
2160 115329f1 Diego Biurrun
2161 c92a30bb Kostya Shishkov
    ff_h264_pred_init(&h->hpc, s->codec_id);
2162 0da71265 Michael Niedermayer
2163 239ea04c Loren Merritt
    h->dequant_coeff_pps= -1;
2164 9a41c2c7 Michael Niedermayer
    s->unrestricted_mv=1;
2165 0da71265 Michael Niedermayer
    s->decode=1; //FIXME
2166 56edbd81 Loren Merritt
2167 a5805aa9 Michael Niedermayer
    dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
2168
2169 56edbd81 Loren Merritt
    memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2170
    memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2171 0da71265 Michael Niedermayer
}
2172
2173 98a6fff9 Zuxy Meng
static av_cold int decode_init(AVCodecContext *avctx){
2174 0da71265 Michael Niedermayer
    H264Context *h= avctx->priv_data;
2175
    MpegEncContext * const s = &h->s;
2176
2177 3edcacde Michael Niedermayer
    MPV_decode_defaults(s);
2178 115329f1 Diego Biurrun
2179 0da71265 Michael Niedermayer
    s->avctx = avctx;
2180
    common_init(h);
2181
2182
    s->out_format = FMT_H264;
2183
    s->workaround_bugs= avctx->workaround_bugs;
2184
2185
    // set defaults
2186
//    s->decode_mb= ff_h263_decode_mb;
2187 9a5a05d0 Andreas Öman
    s->quarter_sample = 1;
2188 0da71265 Michael Niedermayer
    s->low_delay= 1;
2189 7a9dba3c Michael Niedermayer
2190
    if(avctx->codec_id == CODEC_ID_SVQ3)
2191
        avctx->pix_fmt= PIX_FMT_YUVJ420P;
2192 369122dd NVIDIA Corporation
    else if(avctx->codec_id == CODEC_ID_H264_VDPAU)
2193
        avctx->pix_fmt= PIX_FMT_VDPAU_H264;
2194 7a9dba3c Michael Niedermayer
    else
2195 1d42f410 Michael Niedermayer
        avctx->pix_fmt= PIX_FMT_YUV420P;
2196 0da71265 Michael Niedermayer
2197 c2212338 Panagiotis Issaris
    decode_init_vlc();
2198 115329f1 Diego Biurrun
2199 26165f99 Måns Rullgård
    if(avctx->extradata_size > 0 && avctx->extradata &&
2200
       *(char *)avctx->extradata == 1){
2201 4770b1b4 Roberto Togni
        h->is_avc = 1;
2202
        h->got_avcC = 0;
2203 26165f99 Måns Rullgård
    } else {
2204
        h->is_avc = 0;
2205 4770b1b4 Roberto Togni
    }
2206
2207 afebe2f7 Andreas Öman
    h->thread_context[0] = h;
2208 18c7be65 Jeff Downs
    h->outputed_poc = INT_MIN;
2209 e4b8f1fa Michael Niedermayer
    h->prev_poc_msb= 1<<16;
2210 0da71265 Michael Niedermayer
    return 0;
2211
}
2212
2213 af8aa846 Michael Niedermayer
static int frame_start(H264Context *h){
2214 0da71265 Michael Niedermayer
    MpegEncContext * const s = &h->s;
2215
    int i;
2216
2217 af8aa846 Michael Niedermayer
    if(MPV_frame_start(s, s->avctx) < 0)
2218
        return -1;
2219 0da71265 Michael Niedermayer
    ff_er_frame_start(s);
2220 3a22d7fa Jeff Downs
    /*
2221
     * MPV_frame_start uses pict_type to derive key_frame.
2222
     * This is incorrect for H.264; IDR markings must be used.
2223 1412060e Diego Biurrun
     * Zero here; IDR markings per slice in frame or fields are ORed in later.
2224 3a22d7fa Jeff Downs
     * See decode_nal_units().
2225
     */
2226
    s->current_picture_ptr->key_frame= 0;
2227 0da71265 Michael Niedermayer
2228
    assert(s->linesize && s->uvlinesize);
2229
2230
    for(i=0; i<16; i++){
2231
        h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2232 6867a90b Loic Le Loarer
        h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2233 0da71265 Michael Niedermayer
    }
2234
    for(i=0; i<4; i++){
2235
        h->block_offset[16+i]=
2236
        h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2237 6867a90b Loic Le Loarer
        h->block_offset[24+16+i]=
2238
        h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2239 0da71265 Michael Niedermayer
    }
2240
2241 934b0821 Loren Merritt
    /* can't be in alloc_tables because linesize isn't known there.
2242
     * FIXME: redo bipred weight to not require extra buffer? */
2243 afebe2f7 Andreas Öman
    for(i = 0; i < s->avctx->thread_count; i++)
2244
        if(!h->thread_context[i]->s.obmc_scratchpad)
2245
            h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2246 5d18eaad Loren Merritt
2247
    /* some macroblocks will be accessed before they're available */
2248 afebe2f7 Andreas Öman
    if(FRAME_MBAFF || s->avctx->thread_count > 1)
2249 b735aeea Michael Niedermayer
        memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
2250 934b0821 Loren Merritt
2251 0da71265 Michael Niedermayer
//    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2252 28bb9eb2 Michael Niedermayer
2253 1412060e Diego Biurrun
    // We mark the current picture as non-reference after allocating it, so
2254 28bb9eb2 Michael Niedermayer
    // that if we break out due to an error it can be released automatically
2255
    // in the next MPV_frame_start().
2256
    // SVQ3 as well as most other codecs have only last/next/current and thus
2257
    // get released even with set reference, besides SVQ3 and others do not
2258
    // mark frames as reference later "naturally".
2259
    if(s->codec_id != CODEC_ID_SVQ3)
2260
        s->current_picture_ptr->reference= 0;
2261 357282c6 Michael Niedermayer
2262
    s->current_picture_ptr->field_poc[0]=
2263
    s->current_picture_ptr->field_poc[1]= INT_MAX;
2264 5118c6c7 Michael Niedermayer
    assert(s->current_picture_ptr->long_ref==0);
2265 357282c6 Michael Niedermayer
2266 af8aa846 Michael Niedermayer
    return 0;
2267 0da71265 Michael Niedermayer
}
2268
2269 93cc10fa Andreas Öman
static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2270 53c05b1e Michael Niedermayer
    MpegEncContext * const s = &h->s;
2271
    int i;
2272 5f7f9719 Michael Niedermayer
    int step    = 1;
2273
    int offset  = 1;
2274
    int uvoffset= 1;
2275
    int top_idx = 1;
2276
    int skiplast= 0;
2277 115329f1 Diego Biurrun
2278 53c05b1e Michael Niedermayer
    src_y  -=   linesize;
2279
    src_cb -= uvlinesize;
2280
    src_cr -= uvlinesize;
2281
2282 5f7f9719 Michael Niedermayer
    if(!simple && FRAME_MBAFF){
2283
        if(s->mb_y&1){
2284
            offset  = MB_MBAFF ? 1 : 17;
2285
            uvoffset= MB_MBAFF ? 1 : 9;
2286
            if(!MB_MBAFF){
2287
                *(uint64_t*)(h->top_borders[0][s->mb_x]+ 0)= *(uint64_t*)(src_y +  15*linesize);
2288
                *(uint64_t*)(h->top_borders[0][s->mb_x]+ 8)= *(uint64_t*)(src_y +8+15*linesize);
2289
                if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2290
                    *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+7*uvlinesize);
2291
                    *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+7*uvlinesize);
2292
                }
2293
            }
2294
        }else{
2295
            if(!MB_MBAFF){
2296
                h->left_border[0]= h->top_borders[0][s->mb_x][15];
2297
                if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2298
                    h->left_border[34   ]= h->top_borders[0][s->mb_x][16+7  ];
2299
                    h->left_border[34+18]= h->top_borders[0][s->mb_x][16+8+7];
2300
                }
2301
                skiplast= 1;
2302
            }
2303
            offset  =
2304
            uvoffset=
2305
            top_idx = MB_MBAFF ? 0 : 1;
2306
        }
2307
        step= MB_MBAFF ? 2 : 1;
2308
    }
2309
2310 3b66c4c5 Kevin Baragona
    // There are two lines saved, the line above the the top macroblock of a pair,
2311 6867a90b Loic Le Loarer
    // and the line above the bottom macroblock
2312 5f7f9719 Michael Niedermayer
    h->left_border[offset]= h->top_borders[top_idx][s->mb_x][15];
2313
    for(i=1; i<17 - skiplast; i++){
2314
        h->left_border[offset+i*step]= src_y[15+i*  linesize];
2315 53c05b1e Michael Niedermayer
    }
2316 115329f1 Diego Biurrun
2317 5f7f9719 Michael Niedermayer
    *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0)= *(uint64_t*)(src_y +  16*linesize);
2318
    *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2319 53c05b1e Michael Niedermayer
2320 87352549 Michael Niedermayer
    if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2321 5f7f9719 Michael Niedermayer
        h->left_border[uvoffset+34   ]= h->top_borders[top_idx][s->mb_x][16+7];
2322
        h->left_border[uvoffset+34+18]= h->top_borders[top_idx][s->mb_x][24+7];
2323
        for(i=1; i<9 - skiplast; i++){
2324
            h->left_border[uvoffset+34   +i*step]= src_cb[7+i*uvlinesize];
2325
            h->left_border[uvoffset+34+18+i*step]= src_cr[7+i*uvlinesize];
2326 53c05b1e Michael Niedermayer
        }
2327 5f7f9719 Michael Niedermayer
        *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2328
        *(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2329 53c05b1e Michael Niedermayer
    }
2330
}
2331
2332 93cc10fa Andreas Öman
static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2333 53c05b1e Michael Niedermayer
    MpegEncContext * const s = &h->s;
2334
    int temp8, i;
2335
    uint64_t temp64;
2336 b69378e2 Andreas Öman
    int deblock_left;
2337
    int deblock_top;
2338
    int mb_xy;
2339 5f7f9719 Michael Niedermayer
    int step    = 1;
2340
    int offset  = 1;
2341
    int uvoffset= 1;
2342
    int top_idx = 1;
2343
2344
    if(!simple && FRAME_MBAFF){
2345
        if(s->mb_y&1){
2346
            offset  = MB_MBAFF ? 1 : 17;
2347
            uvoffset= MB_MBAFF ? 1 : 9;
2348
        }else{
2349
            offset  =
2350
            uvoffset=
2351
            top_idx = MB_MBAFF ? 0 : 1;
2352
        }
2353
        step= MB_MBAFF ? 2 : 1;
2354
    }
2355 b69378e2 Andreas Öman
2356
    if(h->deblocking_filter == 2) {
2357 64514ee8 Alexander Strange
        mb_xy = h->mb_xy;
2358 b69378e2 Andreas Öman
        deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2359
        deblock_top  = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2360
    } else {
2361
        deblock_left = (s->mb_x > 0);
2362 6c805007 Michael Niedermayer
        deblock_top =  (s->mb_y > !!MB_FIELD);
2363 b69378e2 Andreas Öman
    }
2364 53c05b1e Michael Niedermayer
2365
    src_y  -=   linesize + 1;
2366
    src_cb -= uvlinesize + 1;
2367
    src_cr -= uvlinesize + 1;
2368
2369
#define XCHG(a,b,t,xchg)\
2370
t= a;\
2371
if(xchg)\
2372
    a= b;\
2373
b= t;
2374 d89dc06a Loren Merritt
2375
    if(deblock_left){
2376 5f7f9719 Michael Niedermayer
        for(i = !deblock_top; i<16; i++){
2377
            XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, xchg);
2378 d89dc06a Loren Merritt
        }
2379 5f7f9719 Michael Niedermayer
        XCHG(h->left_border[offset+i*step], src_y [i*  linesize], temp8, 1);
2380 d89dc06a Loren Merritt
    }
2381
2382
    if(deblock_top){
2383 5f7f9719 Michael Niedermayer
        XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2384
        XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2385 cad4368a Reimar Döffinger
        if(s->mb_x+1 < s->mb_width){
2386 5f7f9719 Michael Niedermayer
            XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2387 43efd19a Loren Merritt
        }
2388 53c05b1e Michael Niedermayer
    }
2389
2390 87352549 Michael Niedermayer
    if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2391 d89dc06a Loren Merritt
        if(deblock_left){
2392 5f7f9719 Michael Niedermayer
            for(i = !deblock_top; i<8; i++){
2393
                XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, xchg);
2394
                XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, xchg);
2395 d89dc06a Loren Merritt
            }
2396 5f7f9719 Michael Niedermayer
            XCHG(h->left_border[uvoffset+34   +i*step], src_cb[i*uvlinesize], temp8, 1);
2397
            XCHG(h->left_border[uvoffset+34+18+i*step], src_cr[i*uvlinesize], temp8, 1);
2398 d89dc06a Loren Merritt
        }
2399
        if(deblock_top){
2400 5f7f9719 Michael Niedermayer
            XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2401
            XCHG(*(uint64_t*)(h->top_borders[top_idx][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2402 53c05b1e Michael Niedermayer
        }
2403
    }
2404
}
2405
2406 5a6a6cc7 Diego Biurrun
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2407 0da71265 Michael Niedermayer
    MpegEncContext * const s = &h->s;
2408
    const int mb_x= s->mb_x;
2409
    const int mb_y= s->mb_y;
2410 64514ee8 Alexander Strange
    const int mb_xy= h->mb_xy;
2411 0da71265 Michael Niedermayer
    const int mb_type= s->current_picture.mb_type[mb_xy];
2412
    uint8_t  *dest_y, *dest_cb, *dest_cr;
2413
    int linesize, uvlinesize /*dct_offset*/;
2414
    int i;
2415 6867a90b Loic Le Loarer
    int *block_offset = &h->block_offset[0];
2416 41e4055b Michael Niedermayer
    const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
2417 8b6871ed Diego Biurrun
    /* is_h264 should always be true if SVQ3 is disabled. */
2418
    const int is_h264 = !ENABLE_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
2419 36940eca Loren Merritt
    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2420 ef9d1d15 Loren Merritt
    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2421 0da71265 Michael Niedermayer
2422 6120a343 Michael Niedermayer
    dest_y  = s->current_picture.data[0] + (mb_x + mb_y * s->linesize  ) * 16;
2423
    dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2424
    dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2425 0da71265 Michael Niedermayer
2426 a957c27b Loren Merritt
    s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2427
    s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2428
2429 bd91fee3 Alexander Strange
    if (!simple && MB_FIELD) {
2430 5d18eaad Loren Merritt
        linesize   = h->mb_linesize   = s->linesize * 2;
2431
        uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2432 6867a90b Loic Le Loarer
        block_offset = &h->block_offset[24];
2433 1412060e Diego Biurrun
        if(mb_y&1){ //FIXME move out of this function?
2434 0da71265 Michael Niedermayer
            dest_y -= s->linesize*15;
2435 6867a90b Loic Le Loarer
            dest_cb-= s->uvlinesize*7;
2436
            dest_cr-= s->uvlinesize*7;
2437 0da71265 Michael Niedermayer
        }
2438 5d18eaad Loren Merritt
        if(FRAME_MBAFF) {
2439
            int list;
2440 3425501d Michael Niedermayer
            for(list=0; list<h->list_count; list++){
2441 5d18eaad Loren Merritt
                if(!USES_LIST(mb_type, list))
2442
                    continue;
2443
                if(IS_16X16(mb_type)){
2444
                    int8_t *ref = &h->ref_cache[list][scan8[0]];
2445 1710856c Andreas Öman
                    fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2446 5d18eaad Loren Merritt
                }else{
2447
                    for(i=0; i<16; i+=4){
2448
                        int ref = h->ref_cache[list][scan8[i]];
2449
                        if(ref >= 0)
2450 1710856c Andreas Öman
                            fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2451 5d18eaad Loren Merritt
                    }
2452
                }
2453
            }
2454
        }
2455 0da71265 Michael Niedermayer
    } else {
2456 5d18eaad Loren Merritt
        linesize   = h->mb_linesize   = s->linesize;
2457
        uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2458 0da71265 Michael Niedermayer
//        dct_offset = s->linesize * 16;
2459
    }
2460 115329f1 Diego Biurrun
2461 bd91fee3 Alexander Strange
    if (!simple && IS_INTRA_PCM(mb_type)) {
2462 c1708e8d Michael Niedermayer
        for (i=0; i<16; i++) {
2463
            memcpy(dest_y + i*  linesize, h->mb       + i*8, 16);
2464 6fbcaaa0 Loic Le Loarer
        }
2465 c1708e8d Michael Niedermayer
        for (i=0; i<8; i++) {
2466
            memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4,  8);
2467
            memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4,  8);
2468 6fbcaaa0 Loic Le Loarer
        }
2469 e7e09b49 Loic Le Loarer
    } else {
2470
        if(IS_INTRA(mb_type)){
2471 5f7f9719 Michael Niedermayer
            if(h->deblocking_filter)
2472 93cc10fa Andreas Öman
                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2473 53c05b1e Michael Niedermayer
2474 87352549 Michael Niedermayer
            if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2475 c92a30bb Kostya Shishkov
                h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2476
                h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2477 e7e09b49 Loic Le Loarer
            }
2478 0da71265 Michael Niedermayer
2479 e7e09b49 Loic Le Loarer
            if(IS_INTRA4x4(mb_type)){
2480 bd91fee3 Alexander Strange
                if(simple || !s->encoding){
2481 43efd19a Loren Merritt
                    if(IS_8x8DCT(mb_type)){
2482 1eb96035 Michael Niedermayer
                        if(transform_bypass){
2483
                            idct_dc_add =
2484
                            idct_add    = s->dsp.add_pixels8;
2485 dae006d7 Michael Niedermayer
                        }else{
2486 1eb96035 Michael Niedermayer
                            idct_dc_add = s->dsp.h264_idct8_dc_add;
2487
                            idct_add    = s->dsp.h264_idct8_add;
2488
                        }
2489 43efd19a Loren Merritt
                        for(i=0; i<16; i+=4){
2490
                            uint8_t * const ptr= dest_y + block_offset[i];
2491
                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2492 41e4055b Michael Niedermayer
                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2493
                                h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
2494
                            }else{
2495 ac0623b2 Michael Niedermayer
                                const int nnz = h->non_zero_count_cache[ scan8[i] ];
2496
                                h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2497
                                                            (h->topright_samples_available<<i)&0x4000, linesize);
2498
                                if(nnz){
2499
                                    if(nnz == 1 && h->mb[i*16])
2500
                                        idct_dc_add(ptr, h->mb + i*16, linesize);
2501
                                    else
2502
                                        idct_add   (ptr, h->mb + i*16, linesize);
2503
                                }
2504 41e4055b Michael Niedermayer
                            }
2505 43efd19a Loren Merritt
                        }
2506 1eb96035 Michael Niedermayer
                    }else{
2507
                        if(transform_bypass){
2508
                            idct_dc_add =
2509
                            idct_add    = s->dsp.add_pixels4;
2510
                        }else{
2511
                            idct_dc_add = s->dsp.h264_idct_dc_add;
2512
                            idct_add    = s->dsp.h264_idct_add;
2513
                        }
2514 aebb5d6d Michael Niedermayer
                        for(i=0; i<16; i++){
2515
                            uint8_t * const ptr= dest_y + block_offset[i];
2516
                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2517 e7e09b49 Loic Le Loarer
2518 aebb5d6d Michael Niedermayer
                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
2519
                                h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
2520
                            }else{
2521
                                uint8_t *topright;
2522
                                int nnz, tr;
2523
                                if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2524
                                    const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2525
                                    assert(mb_y || linesize <= block_offset[i]);
2526
                                    if(!topright_avail){
2527
                                        tr= ptr[3 - linesize]*0x01010101;
2528
                                        topright= (uint8_t*) &tr;
2529
                                    }else
2530
                                        topright= ptr + 4 - linesize;
2531 ac0623b2 Michael Niedermayer
                                }else
2532 aebb5d6d Michael Niedermayer
                                    topright= NULL;
2533
2534
                                h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2535
                                nnz = h->non_zero_count_cache[ scan8[i] ];
2536
                                if(nnz){
2537
                                    if(is_h264){
2538
                                        if(nnz == 1 && h->mb[i*16])
2539
                                            idct_dc_add(ptr, h->mb + i*16, linesize);
2540
                                        else
2541
                                            idct_add   (ptr, h->mb + i*16, linesize);
2542
                                    }else
2543
                                        svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2544
                                }
2545 ac0623b2 Michael Niedermayer
                            }
2546 41e4055b Michael Niedermayer
                        }
2547 8b82a956 Michael Niedermayer
                    }
2548 0da71265 Michael Niedermayer
                }
2549 e7e09b49 Loic Le Loarer
            }else{
2550 c92a30bb Kostya Shishkov
                h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2551 bd91fee3 Alexander Strange
                if(is_h264){
2552 36940eca </