ffmpeg / libavcodec / vp8.c @ b0d58795
History  View  Annotate  Download (59 KB)
1 
/**


2 
* VP8 compatible video decoder

3 
*

4 
* Copyright (C) 2010 David Conrad

5 
* Copyright (C) 2010 Ronald S. Bultje

6 
* Copyright (C) 2010 Jason GarrettGlaser

7 
*

8 
* This file is part of FFmpeg.

9 
*

10 
* FFmpeg is free software; you can redistribute it and/or

11 
* modify it under the terms of the GNU Lesser General Public

12 
* License as published by the Free Software Foundation; either

13 
* version 2.1 of the License, or (at your option) any later version.

14 
*

15 
* FFmpeg is distributed in the hope that it will be useful,

16 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

17 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

18 
* Lesser General Public License for more details.

19 
*

20 
* You should have received a copy of the GNU Lesser General Public

21 
* License along with FFmpeg; if not, write to the Free Software

22 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

23 
*/

24  
25 
#include "avcodec.h" 
26 
#include "vp56.h" 
27 
#include "vp8data.h" 
28 
#include "vp8dsp.h" 
29 
#include "h264pred.h" 
30 
#include "rectangle.h" 
31  
32 
typedef struct { 
33 
uint8_t filter_level; 
34 
uint8_t inner_limit; 
35 
uint8_t inner_filter; 
36 
} VP8FilterStrength; 
37  
38 
typedef struct { 
39 
uint8_t skip; 
40 
// todo: make it possible to check for at least (i4x4 or split_mv)

41 
// in one op. are others needed?

42 
uint8_t mode; 
43 
uint8_t ref_frame; 
44 
uint8_t partitioning; 
45 
VP56mv mv; 
46 
VP56mv bmv[16];

47 
} VP8Macroblock; 
48  
49 
typedef struct { 
50 
AVCodecContext *avctx; 
51 
DSPContext dsp; 
52 
VP8DSPContext vp8dsp; 
53 
H264PredContext hpc; 
54 
vp8_mc_func put_pixels_tab[3][3][3]; 
55 
AVFrame frames[4];

56 
AVFrame *framep[4];

57 
uint8_t *edge_emu_buffer; 
58 
VP56RangeCoder c; ///< header context, includes mb modes and motion vectors

59 
int profile;

60  
61 
int mb_width; /* number of horizontal MB */ 
62 
int mb_height; /* number of vertical MB */ 
63 
int linesize;

64 
int uvlinesize;

65  
66 
int keyframe;

67 
int invisible;

68 
int update_last; ///< update VP56_FRAME_PREVIOUS with the current one 
69 
int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so 
70 
int update_altref;

71 
int deblock_filter;

72  
73 
/**

74 
* If this flag is not set, all the probability updates

75 
* are discarded after this frame is decoded.

76 
*/

77 
int update_probabilities;

78  
79 
/**

80 
* All coefficients are contained in separate arith coding contexts.

81 
* There can be 1, 2, 4, or 8 of these after the header context.

82 
*/

83 
int num_coeff_partitions;

84 
VP56RangeCoder coeff_partition[8];

85  
86 
VP8Macroblock *macroblocks; 
87 
VP8Macroblock *macroblocks_base; 
88 
VP8FilterStrength *filter_strength; 
89 
int mb_stride;

90  
91 
uint8_t *intra4x4_pred_mode_top; 
92 
uint8_t intra4x4_pred_mode_left[4];

93 
uint8_t *segmentation_map; 
94 
int b4_stride;

95  
96 
/**

97 
* Cache of the top row needed for intra prediction

98 
* 16 for luma, 8 for each chroma plane

99 
*/

100 
uint8_t (*top_border)[16+8+8]; 
101  
102 
/**

103 
* For coeff decode, we need to know whether the above block had nonzero

104 
* coefficients. This means for each macroblock, we need data for 4 luma

105 
* blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9

106 
* per macroblock. We keep the last row in top_nnz.

107 
*/

108 
uint8_t (*top_nnz)[9];

109 
DECLARE_ALIGNED(8, uint8_t, left_nnz)[9]; 
110  
111 
/**

112 
* This is the index plus one of the last nonzero coeff

113 
* for each of the blocks in the current macroblock.

114 
* So, 0 > no coeffs

115 
* 1 > dconly (special transform)

116 
* 2+> full transform

117 
*/

118 
DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; 
119 
DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; 
120 
DECLARE_ALIGNED(16, DCTELEM, block_dc)[16]; 
121 
uint8_t intra4x4_pred_mode_mb[16];

122  
123 
int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock 
124 
int segment; ///< segment of the current macroblock 
125  
126 
int mbskip_enabled;

127 
int sign_bias[4]; ///< one state [0, 1] per ref frame type 
128 
int ref_count[3]; 
129  
130 
/**

131 
* Base parameters for segmentation, i.e. permacroblock parameters.

132 
* These must be kept unchanged even if segmentation is not used for

133 
* a frame, since the values persist between interframes.

134 
*/

135 
struct {

136 
int enabled;

137 
int absolute_vals;

138 
int update_map;

139 
int8_t base_quant[4];

140 
int8_t filter_level[4]; ///< base loop filter level 
141 
} segmentation; 
142  
143 
/**

144 
* Macroblocks can have one of 4 different quants in a frame when

145 
* segmentation is enabled.

146 
* If segmentation is disabled, only the first segment's values are used.

147 
*/

148 
struct {

149 
// [0]  DC qmul [1]  AC qmul

150 
int16_t luma_qmul[2];

151 
int16_t luma_dc_qmul[2]; ///< luma dconly block quant 
152 
int16_t chroma_qmul[2];

153 
} qmat[4];

154  
155 
struct {

156 
int simple;

157 
int level;

158 
int sharpness;

159 
} filter; 
160  
161 
struct {

162 
int enabled; ///< whether each mb can have a different strength based on mode/ref 
163  
164 
/**

165 
* filter strength adjustment for the following macroblock modes:

166 
* [0]  i4x4

167 
* [1]  zero mv

168 
* [2]  inter modes except for zero or split mv

169 
* [3]  split mv

170 
* i16x16 modes never have any adjustment

171 
*/

172 
int8_t mode[4];

173  
174 
/**

175 
* filter strength adjustment for macroblocks that reference:

176 
* [0]  intra / VP56_FRAME_CURRENT

177 
* [1]  VP56_FRAME_PREVIOUS

178 
* [2]  VP56_FRAME_GOLDEN

179 
* [3]  altref / VP56_FRAME_GOLDEN2

180 
*/

181 
int8_t ref[4];

182 
} lf_delta; 
183  
184 
/**

185 
* These are all of the updatable probabilities for binary decisions.

186 
* They are only implictly reset on keyframes, making it quite likely

187 
* for an interframe to desync if a prior frame's header was corrupt

188 
* or missing outright!

189 
*/

190 
struct {

191 
uint8_t segmentid[3];

192 
uint8_t mbskip; 
193 
uint8_t intra; 
194 
uint8_t last; 
195 
uint8_t golden; 
196 
uint8_t pred16x16[4];

197 
uint8_t pred8x8c[3];

198 
/* Padded to allow overreads */

199 
uint8_t token[4][17][3][NUM_DCT_TOKENS1]; 
200 
uint8_t mvc[2][19]; 
201 
} prob[2];

202 
} VP8Context; 
203  
204 
static void vp8_decode_flush(AVCodecContext *avctx) 
205 
{ 
206 
VP8Context *s = avctx>priv_data; 
207 
int i;

208  
209 
for (i = 0; i < 4; i++) 
210 
if (s>frames[i].data[0]) 
211 
avctx>release_buffer(avctx, &s>frames[i]); 
212 
memset(s>framep, 0, sizeof(s>framep)); 
213  
214 
av_freep(&s>macroblocks_base); 
215 
av_freep(&s>filter_strength); 
216 
av_freep(&s>intra4x4_pred_mode_top); 
217 
av_freep(&s>top_nnz); 
218 
av_freep(&s>edge_emu_buffer); 
219 
av_freep(&s>top_border); 
220 
av_freep(&s>segmentation_map); 
221  
222 
s>macroblocks = NULL;

223 
} 
224  
225 
static int update_dimensions(VP8Context *s, int width, int height) 
226 
{ 
227 
if (avcodec_check_dimensions(s>avctx, width, height))

228 
return AVERROR_INVALIDDATA;

229  
230 
vp8_decode_flush(s>avctx); 
231  
232 
avcodec_set_dimensions(s>avctx, width, height); 
233  
234 
s>mb_width = (s>avctx>coded_width +15) / 16; 
235 
s>mb_height = (s>avctx>coded_height+15) / 16; 
236  
237 
// we allocate a border around the top/left of intra4x4 modes

238 
// this is 4 blocks for intra4x4 to keep 4byte alignment for fill_rectangle

239 
s>mb_stride = s>mb_width+1;

240 
s>b4_stride = 4*s>mb_stride;

241  
242 
s>macroblocks_base = av_mallocz((s>mb_stride+s>mb_height*2+2)*sizeof(*s>macroblocks)); 
243 
s>filter_strength = av_mallocz(s>mb_stride*sizeof(*s>filter_strength));

244 
s>intra4x4_pred_mode_top = av_mallocz(s>b4_stride*4);

245 
s>top_nnz = av_mallocz(s>mb_width*sizeof(*s>top_nnz));

246 
s>top_border = av_mallocz((s>mb_width+1)*sizeof(*s>top_border)); 
247 
s>segmentation_map = av_mallocz(s>mb_stride*s>mb_height); 
248  
249 
if (!s>macroblocks_base  !s>filter_strength  !s>intra4x4_pred_mode_top 

250 
!s>top_nnz  !s>top_border  !s>segmentation_map) 
251 
return AVERROR(ENOMEM);

252  
253 
s>macroblocks = s>macroblocks_base + 1;

254  
255 
return 0; 
256 
} 
257  
258 
static void parse_segment_info(VP8Context *s) 
259 
{ 
260 
VP56RangeCoder *c = &s>c; 
261 
int i;

262  
263 
s>segmentation.update_map = vp8_rac_get(c); 
264  
265 
if (vp8_rac_get(c)) { // update segment feature data 
266 
s>segmentation.absolute_vals = vp8_rac_get(c); 
267  
268 
for (i = 0; i < 4; i++) 
269 
s>segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);

270  
271 
for (i = 0; i < 4; i++) 
272 
s>segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);

273 
} 
274 
if (s>segmentation.update_map)

275 
for (i = 0; i < 3; i++) 
276 
s>prob>segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; 
277 
} 
278  
279 
static void update_lf_deltas(VP8Context *s) 
280 
{ 
281 
VP56RangeCoder *c = &s>c; 
282 
int i;

283  
284 
for (i = 0; i < 4; i++) 
285 
s>lf_delta.ref[i] = vp8_rac_get_sint(c, 6);

286  
287 
for (i = 0; i < 4; i++) 
288 
s>lf_delta.mode[i] = vp8_rac_get_sint(c, 6);

289 
} 
290  
291 
static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) 
292 
{ 
293 
const uint8_t *sizes = buf;

294 
int i;

295  
296 
s>num_coeff_partitions = 1 << vp8_rac_get_uint(&s>c, 2); 
297  
298 
buf += 3*(s>num_coeff_partitions1); 
299 
buf_size = 3*(s>num_coeff_partitions1); 
300 
if (buf_size < 0) 
301 
return 1; 
302  
303 
for (i = 0; i < s>num_coeff_partitions1; i++) { 
304 
int size = AV_RL24(sizes + 3*i); 
305 
if (buf_size  size < 0) 
306 
return 1; 
307  
308 
vp56_init_range_decoder(&s>coeff_partition[i], buf, size); 
309 
buf += size; 
310 
buf_size = size; 
311 
} 
312 
vp56_init_range_decoder(&s>coeff_partition[i], buf, buf_size); 
313  
314 
return 0; 
315 
} 
316  
317 
static void get_quants(VP8Context *s) 
318 
{ 
319 
VP56RangeCoder *c = &s>c; 
320 
int i, base_qi;

321  
322 
int yac_qi = vp8_rac_get_uint(c, 7); 
323 
int ydc_delta = vp8_rac_get_sint(c, 4); 
324 
int y2dc_delta = vp8_rac_get_sint(c, 4); 
325 
int y2ac_delta = vp8_rac_get_sint(c, 4); 
326 
int uvdc_delta = vp8_rac_get_sint(c, 4); 
327 
int uvac_delta = vp8_rac_get_sint(c, 4); 
328  
329 
for (i = 0; i < 4; i++) { 
330 
if (s>segmentation.enabled) {

331 
base_qi = s>segmentation.base_quant[i]; 
332 
if (!s>segmentation.absolute_vals)

333 
base_qi += yac_qi; 
334 
} else

335 
base_qi = yac_qi; 
336  
337 
s>qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)]; 
338 
s>qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi , 0, 127)]; 
339 
s>qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)]; 
340 
s>qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100; 
341 
s>qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)]; 
342 
s>qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)]; 
343  
344 
s>qmat[i].luma_dc_qmul[1] = FFMAX(s>qmat[i].luma_dc_qmul[1], 8); 
345 
s>qmat[i].chroma_qmul[0] = FFMIN(s>qmat[i].chroma_qmul[0], 132); 
346 
} 
347 
} 
348  
349 
/**

350 
* Determine which buffers golden and altref should be updated with after this frame.

351 
* The spec isn't clear here, so I'm going by my understanding of what libvpx does

352 
*

353 
* Intra frames update all 3 references

354 
* Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set

355 
* If the update (goldenaltref) flag is set, it's updated with the current frame

356 
* if update_last is set, and VP56_FRAME_PREVIOUS otherwise.

357 
* If the flag is not set, the number read means:

358 
* 0: no update

359 
* 1: VP56_FRAME_PREVIOUS

360 
* 2: update golden with altref, or update altref with golden

361 
*/

362 
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref) 
363 
{ 
364 
VP56RangeCoder *c = &s>c; 
365  
366 
if (update)

367 
return VP56_FRAME_CURRENT;

368  
369 
switch (vp8_rac_get_uint(c, 2)) { 
370 
case 1: 
371 
return VP56_FRAME_PREVIOUS;

372 
case 2: 
373 
return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;

374 
} 
375 
return VP56_FRAME_NONE;

376 
} 
377  
378 
static void update_refs(VP8Context *s) 
379 
{ 
380 
VP56RangeCoder *c = &s>c; 
381  
382 
int update_golden = vp8_rac_get(c);

383 
int update_altref = vp8_rac_get(c);

384  
385 
s>update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN); 
386 
s>update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2); 
387 
} 
388  
389 
static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) 
390 
{ 
391 
VP56RangeCoder *c = &s>c; 
392 
int header_size, hscale, vscale, i, j, k, l, m, ret;

393 
int width = s>avctx>width;

394 
int height = s>avctx>height;

395  
396 
s>keyframe = !(buf[0] & 1); 
397 
s>profile = (buf[0]>>1) & 7; 
398 
s>invisible = !(buf[0] & 0x10); 
399 
header_size = AV_RL24(buf) >> 5;

400 
buf += 3;

401 
buf_size = 3;

402  
403 
if (s>profile > 3) 
404 
av_log(s>avctx, AV_LOG_WARNING, "Unknown profile %d\n", s>profile);

405  
406 
if (!s>profile)

407 
memcpy(s>put_pixels_tab, s>vp8dsp.put_vp8_epel_pixels_tab, sizeof(s>put_pixels_tab));

408 
else // profile 13 use bilinear, 4+ aren't defined so whatever 
409 
memcpy(s>put_pixels_tab, s>vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s>put_pixels_tab));

410  
411 
if (header_size > buf_size  7*s>keyframe) { 
412 
av_log(s>avctx, AV_LOG_ERROR, "Header size larger than data provided\n");

413 
return AVERROR_INVALIDDATA;

414 
} 
415  
416 
if (s>keyframe) {

417 
if (AV_RL24(buf) != 0x2a019d) { 
418 
av_log(s>avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));

419 
return AVERROR_INVALIDDATA;

420 
} 
421 
width = AV_RL16(buf+3) & 0x3fff; 
422 
height = AV_RL16(buf+5) & 0x3fff; 
423 
hscale = buf[4] >> 6; 
424 
vscale = buf[6] >> 6; 
425 
buf += 7;

426 
buf_size = 7;

427  
428 
if (hscale  vscale)

429 
av_log_missing_feature(s>avctx, "Upscaling", 1); 
430  
431 
s>update_golden = s>update_altref = VP56_FRAME_CURRENT; 
432 
for (i = 0; i < 4; i++) 
433 
for (j = 0; j < 16; j++) 
434 
memcpy(s>prob>token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]], 
435 
sizeof(s>prob>token[i][j]));

436 
memcpy(s>prob>pred16x16, vp8_pred16x16_prob_inter, sizeof(s>prob>pred16x16));

437 
memcpy(s>prob>pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s>prob>pred8x8c));

438 
memcpy(s>prob>mvc , vp8_mv_default_prob , sizeof(s>prob>mvc));

439 
memset(&s>segmentation, 0, sizeof(s>segmentation)); 
440 
} 
441  
442 
if (!s>macroblocks_base  /* first frame */ 
443 
width != s>avctx>width  height != s>avctx>height) { 
444 
if ((ret = update_dimensions(s, width, height) < 0)) 
445 
return ret;

446 
} 
447  
448 
vp56_init_range_decoder(c, buf, header_size); 
449 
buf += header_size; 
450 
buf_size = header_size; 
451  
452 
if (s>keyframe) {

453 
if (vp8_rac_get(c))

454 
av_log(s>avctx, AV_LOG_WARNING, "Unspecified colorspace\n");

455 
vp8_rac_get(c); // whether we can skip clamping in dsp functions

456 
} 
457  
458 
if ((s>segmentation.enabled = vp8_rac_get(c)))

459 
parse_segment_info(s); 
460 
else

461 
s>segmentation.update_map = 0; // FIXME: move this to some init function? 
462  
463 
s>filter.simple = vp8_rac_get(c); 
464 
s>filter.level = vp8_rac_get_uint(c, 6);

465 
s>filter.sharpness = vp8_rac_get_uint(c, 3);

466  
467 
if ((s>lf_delta.enabled = vp8_rac_get(c)))

468 
if (vp8_rac_get(c))

469 
update_lf_deltas(s); 
470  
471 
if (setup_partitions(s, buf, buf_size)) {

472 
av_log(s>avctx, AV_LOG_ERROR, "Invalid partitions\n");

473 
return AVERROR_INVALIDDATA;

474 
} 
475  
476 
get_quants(s); 
477  
478 
if (!s>keyframe) {

479 
update_refs(s); 
480 
s>sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c); 
481 
s>sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);

482 
} 
483  
484 
// if we aren't saving this frame's probabilities for future frames,

485 
// make a copy of the current probabilities

486 
if (!(s>update_probabilities = vp8_rac_get(c)))

487 
s>prob[1] = s>prob[0]; 
488  
489 
s>update_last = s>keyframe  vp8_rac_get(c); 
490  
491 
for (i = 0; i < 4; i++) 
492 
for (j = 0; j < 8; j++) 
493 
for (k = 0; k < 3; k++) 
494 
for (l = 0; l < NUM_DCT_TOKENS1; l++) 
495 
if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {

496 
int prob = vp8_rac_get_uint(c, 8); 
497 
for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++) 
498 
s>prob>token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob; 
499 
} 
500  
501 
if ((s>mbskip_enabled = vp8_rac_get(c)))

502 
s>prob>mbskip = vp8_rac_get_uint(c, 8);

503  
504 
if (!s>keyframe) {

505 
s>prob>intra = vp8_rac_get_uint(c, 8);

506 
s>prob>last = vp8_rac_get_uint(c, 8);

507 
s>prob>golden = vp8_rac_get_uint(c, 8);

508  
509 
if (vp8_rac_get(c))

510 
for (i = 0; i < 4; i++) 
511 
s>prob>pred16x16[i] = vp8_rac_get_uint(c, 8);

512 
if (vp8_rac_get(c))

513 
for (i = 0; i < 3; i++) 
514 
s>prob>pred8x8c[i] = vp8_rac_get_uint(c, 8);

515  
516 
// 17.2 MV probability update

517 
for (i = 0; i < 2; i++) 
518 
for (j = 0; j < 19; j++) 
519 
if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))

520 
s>prob>mvc[i][j] = vp8_rac_get_nn(c); 
521 
} 
522  
523 
return 0; 
524 
} 
525  
526 
static av_always_inline

527 
void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, int mb_x, int mb_y) 
528 
{ 
529 
#define MARGIN (16 << 2) 
530 
dst>x = av_clip(src>x, ((mb_x << 6) + MARGIN),

531 
((s>mb_width  1  mb_x) << 6) + MARGIN); 
532 
dst>y = av_clip(src>y, ((mb_y << 6) + MARGIN),

533 
((s>mb_height  1  mb_y) << 6) + MARGIN); 
534 
} 
535  
536 
static av_always_inline

537 
void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, 
538 
VP56mv near[2], VP56mv *best, uint8_t cnt[4]) 
539 
{ 
540 
VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, 
541 
mb  1 /* left */, 
542 
mb + 1 /* topleft */ }; 
543 
enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT };

544 
VP56mv near_mv[4] = {{ 0 }}; 
545 
enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };

546 
int idx = CNT_ZERO;

547 
int best_idx = CNT_ZERO;

548 
int cur_sign_bias = s>sign_bias[mb>ref_frame];

549 
int *sign_bias = s>sign_bias;

550  
551 
/* Process MB on top, left and topleft */

552 
#define MV_EDGE_CHECK(n)\

553 
{\ 
554 
VP8Macroblock *edge = mb_edge[n];\ 
555 
int edge_ref = edge>ref_frame;\

556 
if (edge_ref != VP56_FRAME_CURRENT) {\

557 
uint32_t mv = AV_RN32A(&edge>mv);\ 
558 
if (mv) {\

559 
if (cur_sign_bias != sign_bias[edge_ref]) {\

560 
/* SWAR negate of the values in mv. */\

561 
mv = ~mv;\ 
562 
mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\ 
563 
}\ 
564 
if (!n  mv != AV_RN32A(&near_mv[idx]))\

565 
AV_WN32A(&near_mv[++idx], mv);\ 
566 
cnt[idx] += 1 + (n != 2);\ 
567 
} else\

568 
cnt[CNT_ZERO] += 1 + (n != 2);\ 
569 
}\ 
570 
} 
571 
MV_EDGE_CHECK(0)

572 
MV_EDGE_CHECK(1)

573 
MV_EDGE_CHECK(2)

574  
575 
/* If we have three distinct MVs, merge first and last if they're the same */

576 
if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1+EDGE_TOP]) == AV_RN32A(&near_mv[1+EDGE_TOPLEFT])) 
577 
cnt[CNT_NEAREST] += 1;

578  
579 
cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]>mode == VP8_MVMODE_SPLIT) + 
580 
(mb_edge[EDGE_TOP]>mode == VP8_MVMODE_SPLIT)) * 2 +

581 
(mb_edge[EDGE_TOPLEFT]>mode == VP8_MVMODE_SPLIT); 
582  
583 
/* Swap near and nearest if necessary */

584 
if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {

585 
FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); 
586 
FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); 
587 
} 
588  
589 
/* Choose the best mv out of 0,0 and the nearest mv */

590 
if (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])

591 
best_idx = CNT_NEAREST; 
592  
593 
mb>mv = near_mv[best_idx]; 
594 
near[0] = near_mv[CNT_NEAREST];

595 
near[1] = near_mv[CNT_NEAR];

596 
} 
597  
598 
/**

599 
* Motion vector coding, 17.1.

600 
*/

601 
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p) 
602 
{ 
603 
int bit, x = 0; 
604  
605 
if (vp56_rac_get_prob_branchy(c, p[0])) { 
606 
int i;

607  
608 
for (i = 0; i < 3; i++) 
609 
x += vp56_rac_get_prob(c, p[9 + i]) << i;

610 
for (i = 9; i > 3; i) 
611 
x += vp56_rac_get_prob(c, p[9 + i]) << i;

612 
if (!(x & 0xFFF0)  vp56_rac_get_prob(c, p[12])) 
613 
x += 8;

614 
} else {

615 
// small_mvtree

616 
const uint8_t *ps = p+2; 
617 
bit = vp56_rac_get_prob(c, *ps); 
618 
ps += 1 + 3*bit; 
619 
x += 4*bit;

620 
bit = vp56_rac_get_prob(c, *ps); 
621 
ps += 1 + bit;

622 
x += 2*bit;

623 
x += vp56_rac_get_prob(c, *ps); 
624 
} 
625  
626 
return (x && vp56_rac_get_prob(c, p[1])) ? x : x; 
627 
} 
628  
629 
static av_always_inline

630 
const uint8_t *get_submv_prob(uint32_t left, uint32_t top)

631 
{ 
632 
if (left == top)

633 
return vp8_submv_prob[4!!left]; 
634 
if (!top)

635 
return vp8_submv_prob[2]; 
636 
return vp8_submv_prob[1!!left]; 
637 
} 
638  
639 
/**

640 
* Split motion vector prediction, 16.4.

641 
* @returns the number of motion vectors parsed (2, 4 or 16)

642 
*/

643 
static av_always_inline

644 
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb)

645 
{ 
646 
int part_idx;

647 
int n, num;

648 
VP8Macroblock *top_mb = &mb[2];

649 
VP8Macroblock *left_mb = &mb[1];

650 
const uint8_t *mbsplits_left = vp8_mbsplits[left_mb>partitioning],

651 
*mbsplits_top = vp8_mbsplits[top_mb>partitioning], 
652 
*mbsplits_cur, *firstidx; 
653 
VP56mv *top_mv = top_mb>bmv; 
654 
VP56mv *left_mv = left_mb>bmv; 
655 
VP56mv *cur_mv = mb>bmv; 
656  
657 
if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) { 
658 
if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) { 
659 
part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);

660 
} else {

661 
part_idx = VP8_SPLITMVMODE_8x8; 
662 
} 
663 
} else {

664 
part_idx = VP8_SPLITMVMODE_4x4; 
665 
} 
666  
667 
num = vp8_mbsplit_count[part_idx]; 
668 
mbsplits_cur = vp8_mbsplits[part_idx], 
669 
firstidx = vp8_mbfirstidx[part_idx]; 
670 
mb>partitioning = part_idx; 
671  
672 
for (n = 0; n < num; n++) { 
673 
int k = firstidx[n];

674 
uint32_t left, above; 
675 
const uint8_t *submv_prob;

676  
677 
if (!(k & 3)) 
678 
left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);

679 
else

680 
left = AV_RN32A(&cur_mv[mbsplits_cur[k  1]]);

681 
if (k <= 3) 
682 
above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);

683 
else

684 
above = AV_RN32A(&cur_mv[mbsplits_cur[k  4]]);

685  
686 
submv_prob = get_submv_prob(left, above); 
687  
688 
if (vp56_rac_get_prob_branchy(c, submv_prob[0])) { 
689 
if (vp56_rac_get_prob_branchy(c, submv_prob[1])) { 
690 
if (vp56_rac_get_prob_branchy(c, submv_prob[2])) { 
691 
mb>bmv[n].y = mb>mv.y + read_mv_component(c, s>prob>mvc[0]);

692 
mb>bmv[n].x = mb>mv.x + read_mv_component(c, s>prob>mvc[1]);

693 
} else {

694 
AV_ZERO32(&mb>bmv[n]); 
695 
} 
696 
} else {

697 
AV_WN32A(&mb>bmv[n], above); 
698 
} 
699 
} else {

700 
AV_WN32A(&mb>bmv[n], left); 
701 
} 
702 
} 
703  
704 
return num;

705 
} 
706  
707 
static av_always_inline

708 
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c,

709 
int mb_x, int keyframe) 
710 
{ 
711 
uint8_t *intra4x4 = s>intra4x4_pred_mode_mb; 
712 
if (keyframe) {

713 
int x, y;

714 
uint8_t* const top = s>intra4x4_pred_mode_top + 4 * mb_x; 
715 
uint8_t* const left = s>intra4x4_pred_mode_left;

716 
for (y = 0; y < 4; y++) { 
717 
for (x = 0; x < 4; x++) { 
718 
const uint8_t *ctx;

719 
ctx = vp8_pred4x4_prob_intra[top[x]][left[y]]; 
720 
*intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); 
721 
left[y] = top[x] = *intra4x4; 
722 
intra4x4++; 
723 
} 
724 
} 
725 
} else {

726 
int i;

727 
for (i = 0; i < 16; i++) 
728 
intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); 
729 
} 
730 
} 
731  
732 
static av_always_inline

733 
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment) 
734 
{ 
735 
VP56RangeCoder *c = &s>c; 
736  
737 
if (s>segmentation.update_map)

738 
*segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s>prob>segmentid); 
739 
s>segment = *segment; 
740  
741 
mb>skip = s>mbskip_enabled ? vp56_rac_get_prob(c, s>prob>mbskip) : 0;

742  
743 
if (s>keyframe) {

744 
mb>mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); 
745  
746 
if (mb>mode == MODE_I4x4) {

747 
decode_intra4x4_modes(s, c, mb_x, 1);

748 
} else {

749 
const uint32_t modes = vp8_pred4x4_mode[mb>mode] * 0x01010101u; 
750 
AV_WN32A(s>intra4x4_pred_mode_top + 4 * mb_x, modes);

751 
AV_WN32A(s>intra4x4_pred_mode_left, modes); 
752 
} 
753  
754 
s>chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); 
755 
mb>ref_frame = VP56_FRAME_CURRENT; 
756 
} else if (vp56_rac_get_prob_branchy(c, s>prob>intra)) { 
757 
VP56mv near[2], best;

758 
uint8_t cnt[4] = { 0 }; 
759  
760 
// inter MB, 16.2

761 
if (vp56_rac_get_prob_branchy(c, s>prob>last))

762 
mb>ref_frame = vp56_rac_get_prob(c, s>prob>golden) ? 
763 
VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;

764 
else

765 
mb>ref_frame = VP56_FRAME_PREVIOUS; 
766 
s>ref_count[mb>ref_frame1]++;

767  
768 
// motion vectors, 16.3

769 
find_near_mvs(s, mb, mb_x, mb_y, near, &best, cnt); 
770 
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[0]][0])) { 
771 
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[1]][1])) { 
772 
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[2]][2])) { 
773 
if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[3]][3])) { 
774 
mb>mode = VP8_MVMODE_SPLIT; 
775 
clamp_mv(s, &mb>mv, &mb>mv, mb_x, mb_y); 
776 
mb>mv = mb>bmv[decode_splitmvs(s, c, mb)  1];

777 
} else {

778 
mb>mode = VP8_MVMODE_NEW; 
779 
clamp_mv(s, &mb>mv, &mb>mv, mb_x, mb_y); 
780 
mb>mv.y += + read_mv_component(c, s>prob>mvc[0]);

781 
mb>mv.x += + read_mv_component(c, s>prob>mvc[1]);

782 
} 
783 
} else {

784 
mb>mode = VP8_MVMODE_NEAR; 
785 
clamp_mv(s, &mb>mv, &near[1], mb_x, mb_y);

786 
} 
787 
} else {

788 
mb>mode = VP8_MVMODE_NEAREST; 
789 
clamp_mv(s, &mb>mv, &near[0], mb_x, mb_y);

790 
} 
791 
} else {

792 
mb>mode = VP8_MVMODE_ZERO; 
793 
AV_ZERO32(&mb>mv); 
794 
} 
795 
if (mb>mode != VP8_MVMODE_SPLIT) {

796 
mb>partitioning = VP8_SPLITMVMODE_NONE; 
797 
mb>bmv[0] = mb>mv;

798 
} 
799 
} else {

800 
// intra MB, 16.1

801 
mb>mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s>prob>pred16x16); 
802  
803 
if (mb>mode == MODE_I4x4)

804 
decode_intra4x4_modes(s, c, mb_x, 0);

805  
806 
s>chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s>prob>pred8x8c); 
807 
mb>ref_frame = VP56_FRAME_CURRENT; 
808 
mb>partitioning = VP8_SPLITMVMODE_NONE; 
809 
AV_ZERO32(&mb>bmv[0]);

810 
} 
811 
} 
812  
813 
/**

814 
* @param c arithmetic bitstream reader context

815 
* @param block destination for block coefficients

816 
* @param probs probabilities to use when reading trees from the bitstream

817 
* @param i initial coeff index, 0 unless a separate DC block is coded

818 
* @param zero_nhood the initial prediction context for number of surrounding

819 
* allzero blocks (only left/top, so 02)

820 
* @param qmul array holding the dc/ac dequant factor at position 0/1

821 
* @return 0 if no coeffs were decoded

822 
* otherwise, the index of the last coeff decoded plus one

823 
*/

824 
static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], 
825 
uint8_t probs[8][3][NUM_DCT_TOKENS1], 
826 
int i, int zero_nhood, int16_t qmul[2]) 
827 
{ 
828 
uint8_t *token_prob = probs[i][zero_nhood]; 
829 
int nonzero = 0; 
830 
int coeff;

831  
832 
do {

833 
if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB 
834 
return nonzero;

835  
836 
skip_eob:

837 
if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0 
838 
if (++i == 16) 
839 
return nonzero; // invalid input; blocks should end with EOB 
840 
token_prob = probs[i][0];

841 
goto skip_eob;

842 
} 
843  
844 
if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1 
845 
coeff = 1;

846 
token_prob = probs[i+1][1]; 
847 
} else {

848 
if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4 
849 
coeff = vp56_rac_get_prob_branchy(c, token_prob[4]);

850 
if (coeff)

851 
coeff += vp56_rac_get_prob(c, token_prob[5]);

852 
coeff += 2;

853 
} else {

854 
// DCT_CAT*

855 
if (!vp56_rac_get_prob_branchy(c, token_prob[6])) { 
856 
if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1 
857 
coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]); 
858 
} else { // DCT_CAT2 
859 
coeff = 7;

860 
coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1; 
861 
coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);

862 
} 
863 
} else { // DCT_CAT3 and up 
864 
int a = vp56_rac_get_prob(c, token_prob[8]); 
865 
int b = vp56_rac_get_prob(c, token_prob[9+a]); 
866 
int cat = (a<<1) + b; 
867 
coeff = 3 + (8<<cat); 
868 
coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); 
869 
} 
870 
} 
871 
token_prob = probs[i+1][2]; 
872 
} 
873  
874 
// todo: full [16] qmat? load into register?

875 
block[zigzag_scan[i]] = (vp8_rac_get(c) ? coeff : coeff) * qmul[!!i]; 
876 
nonzero = ++i; 
877 
} while (i < 16); 
878  
879 
return nonzero;

880 
} 
881  
882 
static av_always_inline

883 
void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,

884 
uint8_t t_nnz[9], uint8_t l_nnz[9]) 
885 
{ 
886 
int i, x, y, luma_start = 0, luma_ctx = 3; 
887 
int nnz_pred, nnz, nnz_total = 0; 
888 
int segment = s>segment;

889 
int block_dc = 0; 
890  
891 
if (mb>mode != MODE_I4x4 && mb>mode != VP8_MVMODE_SPLIT) {

892 
nnz_pred = t_nnz[8] + l_nnz[8]; 
893  
894 
// decode DC values and do hadamard

895 
nnz = decode_block_coeffs(c, s>block_dc, s>prob>token[1], 0, nnz_pred, 
896 
s>qmat[segment].luma_dc_qmul); 
897 
l_nnz[8] = t_nnz[8] = !!nnz; 
898 
if (nnz) {

899 
nnz_total += nnz; 
900 
block_dc = 1;

901 
if (nnz == 1) 
902 
s>vp8dsp.vp8_luma_dc_wht_dc(s>block, s>block_dc); 
903 
else

904 
s>vp8dsp.vp8_luma_dc_wht(s>block, s>block_dc); 
905 
} 
906 
luma_start = 1;

907 
luma_ctx = 0;

908 
} 
909  
910 
// luma blocks

911 
for (y = 0; y < 4; y++) 
912 
for (x = 0; x < 4; x++) { 
913 
nnz_pred = l_nnz[y] + t_nnz[x]; 
914 
nnz = decode_block_coeffs(c, s>block[y][x], s>prob>token[luma_ctx], luma_start, 
915 
nnz_pred, s>qmat[segment].luma_qmul); 
916 
// nnz+block_dc may be one more than the actual last index, but we don't care

917 
s>non_zero_count_cache[y][x] = nnz + block_dc; 
918 
t_nnz[x] = l_nnz[y] = !!nnz; 
919 
nnz_total += nnz; 
920 
} 
921  
922 
// chroma blocks

923 
// TODO: what to do about dimensions? 2nd dim for luma is x,

924 
// but for chroma it's (y<<1)x

925 
for (i = 4; i < 6; i++) 
926 
for (y = 0; y < 2; y++) 
927 
for (x = 0; x < 2; x++) { 
928 
nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x]; 
929 
nnz = decode_block_coeffs(c, s>block[i][(y<<1)+x], s>prob>token[2], 0, 
930 
nnz_pred, s>qmat[segment].chroma_qmul); 
931 
s>non_zero_count_cache[i][(y<<1)+x] = nnz;

932 
t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz; 
933 
nnz_total += nnz; 
934 
} 
935  
936 
// if there were no coded coeffs despite the macroblock not being marked skip,

937 
// we MUST not do the inner loop filter and should not do IDCT

938 
// Since skip isn't used for bitstream prediction, just manually set it.

939 
if (!nnz_total)

940 
mb>skip = 1;

941 
} 
942  
943 
static av_always_inline

944 
void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,

945 
int linesize, int uvlinesize, int simple) 
946 
{ 
947 
AV_COPY128(top_border, src_y + 15*linesize);

948 
if (!simple) {

949 
AV_COPY64(top_border+16, src_cb + 7*uvlinesize); 
950 
AV_COPY64(top_border+24, src_cr + 7*uvlinesize); 
951 
} 
952 
} 
953  
954 
static av_always_inline

955 
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,

956 
int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width, 
957 
int simple, int xchg) 
958 
{ 
959 
uint8_t *top_border_m1 = top_border32; // for TL prediction 
960 
src_y = linesize; 
961 
src_cb = uvlinesize; 
962 
src_cr = uvlinesize; 
963  
964 
#define XCHG(a,b,xchg) do { \ 
965 
if (xchg) AV_SWAP64(b,a); \

966 
else AV_COPY64(b,a); \

967 
} while (0) 
968  
969 
XCHG(top_border_m1+8, src_y8, xchg); 
970 
XCHG(top_border, src_y, xchg); 
971 
XCHG(top_border+8, src_y+8, 1); 
972 
if (mb_x < mb_width1) 
973 
XCHG(top_border+32, src_y+16, 1); 
974  
975 
// only copy chroma for normal loop filter

976 
// or to initialize the top row to 127

977 
if (!simple  !mb_y) {

978 
XCHG(top_border_m1+16, src_cb8, xchg); 
979 
XCHG(top_border_m1+24, src_cr8, xchg); 
980 
XCHG(top_border+16, src_cb, 1); 
981 
XCHG(top_border+24, src_cr, 1); 
982 
} 
983 
} 
984  
985 
static av_always_inline

986 
int check_intra_pred_mode(int mode, int mb_x, int mb_y) 
987 
{ 
988 
if (mode == DC_PRED8x8) {

989 
if (!mb_x) {

990 
mode = mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8; 
991 
} else if (!mb_y) { 
992 
mode = LEFT_DC_PRED8x8; 
993 
} 
994 
} 
995 
return mode;

996 
} 
997  
998 
static av_always_inline

999 
void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, 
1000 
int mb_x, int mb_y) 
1001 
{ 
1002 
int x, y, mode, nnz, tr;

1003  
1004 
// for the first row, we need to run xchg_mb_border to init the top edge to 127

1005 
// otherwise, skip it if we aren't going to deblock

1006 
if (s>deblock_filter  !mb_y)

1007 
xchg_mb_border(s>top_border[mb_x+1], dst[0], dst[1], dst[2], 
1008 
s>linesize, s>uvlinesize, mb_x, mb_y, s>mb_width, 
1009 
s>filter.simple, 1);

1010  
1011 
if (mb>mode < MODE_I4x4) {

1012 
mode = check_intra_pred_mode(mb>mode, mb_x, mb_y); 
1013 
s>hpc.pred16x16[mode](dst[0], s>linesize);

1014 
} else {

1015 
uint8_t *ptr = dst[0];

1016 
uint8_t *intra4x4 = s>intra4x4_pred_mode_mb; 
1017  
1018 
// all blocks on the right edge of the macroblock use bottom edge

1019 
// the top macroblock for their topright edge

1020 
uint8_t *tr_right = ptr  s>linesize + 16;

1021  
1022 
// if we're on the right edge of the frame, said edge is extended

1023 
// from the top macroblock

1024 
if (mb_x == s>mb_width1) { 
1025 
tr = tr_right[1]*0x01010101; 
1026 
tr_right = (uint8_t *)&tr; 
1027 
} 
1028  
1029 
if (mb>skip)

1030 
AV_ZERO128(s>non_zero_count_cache); 
1031  
1032 
for (y = 0; y < 4; y++) { 
1033 
uint8_t *topright = ptr + 4  s>linesize;

1034 
for (x = 0; x < 4; x++) { 
1035 
if (x == 3) 
1036 
topright = tr_right; 
1037  
1038 
s>hpc.pred4x4[intra4x4[x]](ptr+4*x, topright, s>linesize);

1039  
1040 
nnz = s>non_zero_count_cache[y][x]; 
1041 
if (nnz) {

1042 
if (nnz == 1) 
1043 
s>vp8dsp.vp8_idct_dc_add(ptr+4*x, s>block[y][x], s>linesize);

1044 
else

1045 
s>vp8dsp.vp8_idct_add(ptr+4*x, s>block[y][x], s>linesize);

1046 
} 
1047 
topright += 4;

1048 
} 
1049  
1050 
ptr += 4*s>linesize;

1051 
intra4x4 += 4;

1052 
} 
1053 
} 
1054  
1055 
mode = check_intra_pred_mode(s>chroma_pred_mode, mb_x, mb_y); 
1056 
s>hpc.pred8x8[mode](dst[1], s>uvlinesize);

1057 
s>hpc.pred8x8[mode](dst[2], s>uvlinesize);

1058  
1059 
if (s>deblock_filter  !mb_y)

1060 
xchg_mb_border(s>top_border[mb_x+1], dst[0], dst[1], dst[2], 
1061 
s>linesize, s>uvlinesize, mb_x, mb_y, s>mb_width, 
1062 
s>filter.simple, 0);

1063 
} 
1064  
1065 
/**

1066 
* Generic MC function.

1067 
*

1068 
* @param s VP8 decoding context

1069 
* @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes

1070 
* @param dst target buffer for block data at block position

1071 
* @param src reference picture buffer at origin (0, 0)

1072 
* @param mv motion vector (relative to block position) to get pixel data from

1073 
* @param x_off horizontal position of block from origin (0, 0)

1074 
* @param y_off vertical position of block from origin (0, 0)

1075 
* @param block_w width of block (16, 8 or 4)

1076 
* @param block_h height of block (always same as block_w)

1077 
* @param width width of src/dst plane data

1078 
* @param height height of src/dst plane data

1079 
* @param linesize size of a single line of plane data, including padding

1080 
* @param mc_func motion compensation function pointers (bilinear or sixtap MC)

1081 
*/

1082 
static av_always_inline

1083 
void vp8_mc(VP8Context *s, int luma, 
1084 
uint8_t *dst, uint8_t *src, const VP56mv *mv,

1085 
int x_off, int y_off, int block_w, int block_h, 
1086 
int width, int height, int linesize, 
1087 
vp8_mc_func mc_func[3][3]) 
1088 
{ 
1089 
if (AV_RN32A(mv)) {

1090 
static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; 
1091 
int mx = (mv>x << luma)&7, mx_idx = idx[mx]; 
1092 
int my = (mv>y << luma)&7, my_idx = idx[my]; 
1093  
1094 
x_off += mv>x >> (3  luma);

1095 
y_off += mv>y >> (3  luma);

1096  
1097 
// edge emulation

1098 
src += y_off * linesize + x_off; 
1099 
if (x_off < 2  x_off >= width  block_w  3  
1100 
y_off < 2  y_off >= height  block_h  3) { 
1101 
ff_emulated_edge_mc(s>edge_emu_buffer, src  2 * linesize  2, linesize, 
1102 
block_w + 5, block_h + 5, 
1103 
x_off  2, y_off  2, width, height); 
1104 
src = s>edge_emu_buffer + 2 + linesize * 2; 
1105 
} 
1106 
mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); 
1107 
} else

1108 
mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); 
1109 
} 
1110  
1111 
static av_always_inline

1112 
void vp8_mc_part(VP8Context *s, uint8_t *dst[3], 
1113 
AVFrame *ref_frame, int x_off, int y_off, 
1114 
int bx_off, int by_off, 
1115 
int block_w, int block_h, 
1116 
int width, int height, VP56mv *mv) 
1117 
{ 
1118 
VP56mv uvmv = *mv; 
1119  
1120 
/* Y */

1121 
vp8_mc(s, 1, dst[0] + by_off * s>linesize + bx_off, 
1122 
ref_frame>data[0], mv, x_off + bx_off, y_off + by_off,

1123 
block_w, block_h, width, height, s>linesize, 
1124 
s>put_pixels_tab[block_w == 8]);

1125  
1126 
/* U/V */

1127 
if (s>profile == 3) { 
1128 
uvmv.x &= ~7;

1129 
uvmv.y &= ~7;

1130 
} 
1131 
x_off >>= 1; y_off >>= 1; 
1132 
bx_off >>= 1; by_off >>= 1; 
1133 
width >>= 1; height >>= 1; 
1134 
block_w >>= 1; block_h >>= 1; 
1135 
vp8_mc(s, 0, dst[1] + by_off * s>uvlinesize + bx_off, 
1136 
ref_frame>data[1], &uvmv, x_off + bx_off, y_off + by_off,

1137 
block_w, block_h, width, height, s>uvlinesize, 
1138 
s>put_pixels_tab[1 + (block_w == 4)]); 
1139 
vp8_mc(s, 0, dst[2] + by_off * s>uvlinesize + bx_off, 
1140 
ref_frame>data[2], &uvmv, x_off + bx_off, y_off + by_off,

1141 
block_w, block_h, width, height, s>uvlinesize, 
1142 
s>put_pixels_tab[1 + (block_w == 4)]); 
1143 
} 
1144  
1145 
/* Fetch pixels for estimated mv 4 macroblocks ahead.

1146 
* Optimized for 64byte cache lines. Inspired by ffh264 prefetch_motion. */

1147 
static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) 
1148 
{ 
1149 
/* Don't prefetch refs that haven't been used very often this frame. */

1150 
if (s>ref_count[ref1] > (mb_xy >> 5)) { 
1151 
int x_off = mb_x << 4, y_off = mb_y << 4; 
1152 
int mx = mb>mv.x + x_off + 8; 
1153 
int my = mb>mv.y + y_off;

1154 
uint8_t **src= s>framep[ref]>data; 
1155 
int off= mx + (my + (mb_x&3)*4)*s>linesize + 64; 
1156 
s>dsp.prefetch(src[0]+off, s>linesize, 4); 
1157 
off= (mx>>1) + ((my>>1) + (mb_x&7))*s>uvlinesize + 64; 
1158 
s>dsp.prefetch(src[1]+off, src[2]src[1], 2); 
1159 
} 
1160 
} 
1161  
1162 
/**

1163 
* Apply motion vectors to prediction buffer, chapter 18.

1164 
*/

1165 
static av_always_inline

1166 
void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, 
1167 
int mb_x, int mb_y) 
1168 
{ 
1169 
int x_off = mb_x << 4, y_off = mb_y << 4; 
1170 
int width = 16*s>mb_width, height = 16*s>mb_height; 
1171 
AVFrame *ref = s>framep[mb>ref_frame]; 
1172 
VP56mv *bmv = mb>bmv; 
1173  
1174 
if (mb>mode < VP8_MVMODE_SPLIT) {

1175 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1176 
0, 0, 16, 16, width, height, &mb>mv); 
1177 
} else switch (mb>partitioning) { 
1178 
case VP8_SPLITMVMODE_4x4: {

1179 
int x, y;

1180 
VP56mv uvmv; 
1181  
1182 
/* Y */

1183 
for (y = 0; y < 4; y++) { 
1184 
for (x = 0; x < 4; x++) { 
1185 
vp8_mc(s, 1, dst[0] + 4*y*s>linesize + x*4, 
1186 
ref>data[0], &bmv[4*y + x], 
1187 
4*x + x_off, 4*y + y_off, 4, 4, 
1188 
width, height, s>linesize, 
1189 
s>put_pixels_tab[2]);

1190 
} 
1191 
} 
1192  
1193 
/* U/V */

1194 
x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1; 
1195 
for (y = 0; y < 2; y++) { 
1196 
for (x = 0; x < 2; x++) { 
1197 
uvmv.x = mb>bmv[ 2*y * 4 + 2*x ].x + 
1198 
mb>bmv[ 2*y * 4 + 2*x+1].x + 
1199 
mb>bmv[(2*y+1) * 4 + 2*x ].x + 
1200 
mb>bmv[(2*y+1) * 4 + 2*x+1].x; 
1201 
uvmv.y = mb>bmv[ 2*y * 4 + 2*x ].y + 
1202 
mb>bmv[ 2*y * 4 + 2*x+1].y + 
1203 
mb>bmv[(2*y+1) * 4 + 2*x ].y + 
1204 
mb>bmv[(2*y+1) * 4 + 2*x+1].y; 
1205 
uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT1))) >> 2; 
1206 
uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT1))) >> 2; 
1207 
if (s>profile == 3) { 
1208 
uvmv.x &= ~7;

1209 
uvmv.y &= ~7;

1210 
} 
1211 
vp8_mc(s, 0, dst[1] + 4*y*s>uvlinesize + x*4, 
1212 
ref>data[1], &uvmv,

1213 
4*x + x_off, 4*y + y_off, 4, 4, 
1214 
width, height, s>uvlinesize, 
1215 
s>put_pixels_tab[2]);

1216 
vp8_mc(s, 0, dst[2] + 4*y*s>uvlinesize + x*4, 
1217 
ref>data[2], &uvmv,

1218 
4*x + x_off, 4*y + y_off, 4, 4, 
1219 
width, height, s>uvlinesize, 
1220 
s>put_pixels_tab[2]);

1221 
} 
1222 
} 
1223 
break;

1224 
} 
1225 
case VP8_SPLITMVMODE_16x8:

1226 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1227 
0, 0, 16, 8, width, height, &bmv[0]); 
1228 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1229 
0, 8, 16, 8, width, height, &bmv[1]); 
1230 
break;

1231 
case VP8_SPLITMVMODE_8x16:

1232 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1233 
0, 0, 8, 16, width, height, &bmv[0]); 
1234 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1235 
8, 0, 8, 16, width, height, &bmv[1]); 
1236 
break;

1237 
case VP8_SPLITMVMODE_8x8:

1238 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1239 
0, 0, 8, 8, width, height, &bmv[0]); 
1240 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1241 
8, 0, 8, 8, width, height, &bmv[1]); 
1242 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1243 
0, 8, 8, 8, width, height, &bmv[2]); 
1244 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1245 
8, 8, 8, 8, width, height, &bmv[3]); 
1246 
break;

1247 
} 
1248 
} 
1249  
1250 
static av_always_inline void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) 
1251 
{ 
1252 
int x, y, ch;

1253  
1254 
if (mb>mode != MODE_I4x4) {

1255 
uint8_t *y_dst = dst[0];

1256 
for (y = 0; y < 4; y++) { 
1257 
uint32_t nnz4 = AV_RN32A(s>non_zero_count_cache[y]); 
1258 
if (nnz4) {

1259 
if (nnz4&~0x01010101) { 
1260 
for (x = 0; x < 4; x++) { 
1261 
int nnz = s>non_zero_count_cache[y][x];

1262 
if (nnz) {

1263 
if (nnz == 1) 
1264 
s>vp8dsp.vp8_idct_dc_add(y_dst+4*x, s>block[y][x], s>linesize);

1265 
else

1266 
s>vp8dsp.vp8_idct_add(y_dst+4*x, s>block[y][x], s>linesize);

1267 
} 
1268 
} 
1269 
} else {

1270 
s>vp8dsp.vp8_idct_dc_add4y(y_dst, s>block[y], s>linesize); 
1271 
} 
1272 
} 
1273 
y_dst += 4*s>linesize;

1274 
} 
1275 
} 
1276  
1277 
for (ch = 0; ch < 2; ch++) { 
1278 
uint32_t nnz4 = AV_RN32A(s>non_zero_count_cache[4+ch]);

1279 
if (nnz4) {

1280 
uint8_t *ch_dst = dst[1+ch];

1281 
if (nnz4&~0x01010101) { 
1282 
for (y = 0; y < 2; y++) { 
1283 
for (x = 0; x < 2; x++) { 
1284 
int nnz = s>non_zero_count_cache[4+ch][(y<<1)+x]; 
1285 
if (nnz) {

1286 
if (nnz == 1) 
1287 
s>vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s>block[4+ch][(y<<1)+x], s>uvlinesize); 
1288 
else

1289 
s>vp8dsp.vp8_idct_add(ch_dst+4*x, s>block[4+ch][(y<<1)+x], s>uvlinesize); 
1290 
} 
1291 
} 
1292 
ch_dst += 4*s>uvlinesize;

1293 
} 
1294 
} else {

1295 
s>vp8dsp.vp8_idct_dc_add4uv(ch_dst, s>block[4+ch], s>uvlinesize);

1296 
} 
1297 
} 
1298 
} 
1299 
} 
1300  
1301 
static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) 
1302 
{ 
1303 
int interior_limit, filter_level;

1304  
1305 
if (s>segmentation.enabled) {

1306 
filter_level = s>segmentation.filter_level[s>segment]; 
1307 
if (!s>segmentation.absolute_vals)

1308 
filter_level += s>filter.level; 
1309 
} else

1310 
filter_level = s>filter.level; 
1311  
1312 
if (s>lf_delta.enabled) {

1313 
filter_level += s>lf_delta.ref[mb>ref_frame]; 
1314  
1315 
if (mb>ref_frame == VP56_FRAME_CURRENT) {

1316 
if (mb>mode == MODE_I4x4)

1317 
filter_level += s>lf_delta.mode[0];

1318 
} else {

1319 
if (mb>mode == VP8_MVMODE_ZERO)

1320 
filter_level += s>lf_delta.mode[1];

1321 
else if (mb>mode == VP8_MVMODE_SPLIT) 
1322 
filter_level += s>lf_delta.mode[3];

1323 
else

1324 
filter_level += s>lf_delta.mode[2];

1325 
} 
1326 
} 
1327 
filter_level = av_clip(filter_level, 0, 63); 
1328  
1329 
interior_limit = filter_level; 
1330 
if (s>filter.sharpness) {

1331 
interior_limit >>= s>filter.sharpness > 4 ? 2 : 1; 
1332 
interior_limit = FFMIN(interior_limit, 9  s>filter.sharpness);

1333 
} 
1334 
interior_limit = FFMAX(interior_limit, 1);

1335  
1336 
f>filter_level = filter_level; 
1337 
f>inner_limit = interior_limit; 
1338 
f>inner_filter = !mb>skip  mb>mode == MODE_I4x4  mb>mode == VP8_MVMODE_SPLIT; 
1339 
} 
1340  
1341 
static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) 
1342 
{ 
1343 
int mbedge_lim, bedge_lim, hev_thresh;

1344 
int filter_level = f>filter_level;

1345 
int inner_limit = f>inner_limit;

1346 
int inner_filter = f>inner_filter;

1347 
int linesize = s>linesize;

1348 
int uvlinesize = s>uvlinesize;

1349  
1350 
if (!filter_level)

1351 
return;

1352  
1353 
mbedge_lim = 2*(filter_level+2) + inner_limit; 
1354 
bedge_lim = 2* filter_level + inner_limit;

1355 
hev_thresh = filter_level >= 15;

1356  
1357 
if (s>keyframe) {

1358 
if (filter_level >= 40) 
1359 
hev_thresh = 2;

1360 
} else {

1361 
if (filter_level >= 40) 
1362 
hev_thresh = 3;

1363 
else if (filter_level >= 20) 
1364 
hev_thresh = 2;

1365 
} 
1366  
1367 
if (mb_x) {

1368 
s>vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,

1369 
mbedge_lim, inner_limit, hev_thresh); 
1370 
s>vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, 
1371 
mbedge_lim, inner_limit, hev_thresh); 
1372 
} 
1373  
1374 
if (inner_filter) {

1375 
s>vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim, 
1376 
inner_limit, hev_thresh); 
1377 
s>vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim, 
1378 
inner_limit, hev_thresh); 
1379 
s>vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim, 
1380 
inner_limit, hev_thresh); 
1381 
s>vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, 
1382 
uvlinesize, bedge_lim, 
1383 
inner_limit, hev_thresh); 
1384 
} 
1385  
1386 
if (mb_y) {

1387 
s>vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,

1388 
mbedge_lim, inner_limit, hev_thresh); 
1389 
s>vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, 
1390 
mbedge_lim, inner_limit, hev_thresh); 
1391 
} 
1392  
1393 
if (inner_filter) {

1394 
s>vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize, 
1395 
linesize, bedge_lim, 
1396 
inner_limit, hev_thresh); 
1397 
s>vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize, 
1398 
linesize, bedge_lim, 
1399 
inner_limit, hev_thresh); 
1400 
s>vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize, 
1401 
linesize, bedge_lim, 
1402 
inner_limit, hev_thresh); 
1403 
s>vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, 
1404 
dst[2] + 4 * uvlinesize, 
1405 
uvlinesize, bedge_lim, 
1406 
inner_limit, hev_thresh); 
1407 
} 
1408 
} 
1409  
1410 
static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) 
1411 
{ 
1412 
int mbedge_lim, bedge_lim;

1413 
int filter_level = f>filter_level;

1414 
int inner_limit = f>inner_limit;

1415 
int inner_filter = f>inner_filter;

1416 
int linesize = s>linesize;

1417  
1418 
if (!filter_level)

1419 
return;

1420  
1421 
mbedge_lim = 2*(filter_level+2) + inner_limit; 
1422 
bedge_lim = 2* filter_level + inner_limit;

1423  
1424 
if (mb_x)

1425 
s>vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim); 
1426 
if (inner_filter) {

1427 
s>vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);

1428 
s>vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);

1429 
s>vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);

1430 
} 
1431  
1432 
if (mb_y)

1433 
s>vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim); 
1434 
if (inner_filter) {

1435 
s>vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);

1436 
s>vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);

1437 
s>vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);

1438 
} 
1439 
} 
1440  
1441 
static void filter_mb_row(VP8Context *s, int mb_y) 
1442 
{ 
1443 
VP8FilterStrength *f = s>filter_strength; 
1444 
uint8_t *dst[3] = {

1445 
s>framep[VP56_FRAME_CURRENT]>data[0] + 16*mb_y*s>linesize, 
1446 
s>framep[VP56_FRAME_CURRENT]>data[1] + 8*mb_y*s>uvlinesize, 
1447 
s>framep[VP56_FRAME_CURRENT]>data[2] + 8*mb_y*s>uvlinesize 
1448 
}; 
1449 
int mb_x;

1450  
1451 
for (mb_x = 0; mb_x < s>mb_width; mb_x++) { 
1452 
backup_mb_border(s>top_border[mb_x+1], dst[0], dst[1], dst[2], s>linesize, s>uvlinesize, 0); 
1453 
filter_mb(s, dst, f++, mb_x, mb_y); 
1454 
dst[0] += 16; 
1455 
dst[1] += 8; 
1456 
dst[2] += 8; 
1457 
} 
1458 
} 
1459  
1460 
static void filter_mb_row_simple(VP8Context *s, int mb_y) 
1461 
{ 
1462 
VP8FilterStrength *f = s>filter_strength; 
1463 
uint8_t *dst = s>framep[VP56_FRAME_CURRENT]>data[0] + 16*mb_y*s>linesize; 
1464 
int mb_x;

1465  
1466 
for (mb_x = 0; mb_x < s>mb_width; mb_x++) { 
1467 
backup_mb_border(s>top_border[mb_x+1], dst, NULL, NULL, s>linesize, 0, 1); 
1468 
filter_mb_simple(s, dst, f++, mb_x, mb_y); 
1469 
dst += 16;

1470 
} 
1471 
} 
1472  
1473 
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, 
1474 
AVPacket *avpkt) 
1475 
{ 
1476 
VP8Context *s = avctx>priv_data; 
1477 
int ret, mb_x, mb_y, i, y, referenced;

1478 
enum AVDiscard skip_thresh;

1479 
AVFrame *av_uninit(curframe); 
1480  
1481 
if ((ret = decode_frame_header(s, avpkt>data, avpkt>size)) < 0) 
1482 
return ret;

1483  
1484 
referenced = s>update_last  s>update_golden == VP56_FRAME_CURRENT 
1485 
 s>update_altref == VP56_FRAME_CURRENT; 
1486  
1487 
skip_thresh = !referenced ? AVDISCARD_NONREF : 
1488 
!s>keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL; 
1489  
1490 
if (avctx>skip_frame >= skip_thresh) {

1491 
s>invisible = 1;

1492 
goto skip_decode;

1493 
} 
1494 
s>deblock_filter = s>filter.level && avctx>skip_loop_filter < skip_thresh; 
1495  
1496 
for (i = 0; i < 4; i++) 
1497 
if (&s>frames[i] != s>framep[VP56_FRAME_PREVIOUS] &&

1498 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN] && 
1499 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN2]) { 
1500 
curframe = s>framep[VP56_FRAME_CURRENT] = &s>frames[i]; 
1501 
break;

1502 
} 
1503 
if (curframe>data[0]) 
1504 
avctx>release_buffer(avctx, curframe); 
1505  
1506 
curframe>key_frame = s>keyframe; 
1507 
curframe>pict_type = s>keyframe ? FF_I_TYPE : FF_P_TYPE; 
1508 
curframe>reference = referenced ? 3 : 0; 
1509 
if ((ret = avctx>get_buffer(avctx, curframe))) {

1510 
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");

1511 
return ret;

1512 
} 
1513  
1514 
// Given that arithmetic probabilities are updated every frame, it's quite likely

1515 
// that the values we have on a random interframe are complete junk if we didn't

1516 
// start decode on a keyframe. So just don't display anything rather than junk.

1517 
if (!s>keyframe && (!s>framep[VP56_FRAME_PREVIOUS] 

1518 
!s>framep[VP56_FRAME_GOLDEN]  
1519 
!s>framep[VP56_FRAME_GOLDEN2])) { 
1520 
av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");

1521 
return AVERROR_INVALIDDATA;

1522 
} 
1523  
1524 
s>linesize = curframe>linesize[0];

1525 
s>uvlinesize = curframe>linesize[1];

1526  
1527 
if (!s>edge_emu_buffer)

1528 
s>edge_emu_buffer = av_malloc(21*s>linesize);

1529  
1530 
memset(s>top_nnz, 0, s>mb_width*sizeof(*s>top_nnz)); 
1531  
1532 
/* Zero macroblock structures for top/left prediction from outside the frame. */

1533 
memset(s>macroblocks, 0, (s>mb_width + s>mb_height*2)*sizeof(*s>macroblocks)); 
1534  
1535 
// top edge of 127 for intra prediction

1536 
memset(s>top_border, 127, (s>mb_width+1)*sizeof(*s>top_border)); 
1537 
memset(s>ref_count, 0, sizeof(s>ref_count)); 
1538 
if (s>keyframe)

1539 
memset(s>intra4x4_pred_mode_top, DC_PRED, s>b4_stride*4);

1540  
1541 
for (mb_y = 0; mb_y < s>mb_height; mb_y++) { 
1542 
VP56RangeCoder *c = &s>coeff_partition[mb_y & (s>num_coeff_partitions1)];

1543 
VP8Macroblock *mb = s>macroblocks + (s>mb_height  mb_y  1)*2; 
1544 
uint8_t *segment_map = s>segmentation_map + mb_y*s>mb_stride; 
1545 
int mb_xy = mb_y * s>mb_stride;

1546 
uint8_t *dst[3] = {

1547 
curframe>data[0] + 16*mb_y*s>linesize, 
1548 
curframe>data[1] + 8*mb_y*s>uvlinesize, 
1549 
curframe>data[2] + 8*mb_y*s>uvlinesize 
1550 
}; 
1551  
1552 
memset(s>left_nnz, 0, sizeof(s>left_nnz)); 
1553 
AV_WN32A(s>intra4x4_pred_mode_left, DC_PRED*0x01010101);

1554  
1555 
// left edge of 129 for intra prediction

1556 
if (!(avctx>flags & CODEC_FLAG_EMU_EDGE))

1557 
for (i = 0; i < 3; i++) 
1558 
for (y = 0; y < 16>>!!i; y++) 
1559 
dst[i][y*curframe>linesize[i]1] = 129; 
1560 
if (mb_y)

1561 
memset(s>top_border, 129, sizeof(*s>top_border)); 
1562  
1563 
for (mb_x = 0; mb_x < s>mb_width; mb_x++, mb_xy++, mb++) { 
1564 
uint8_t *segment_mb = segment_map+mb_x; 
1565  
1566 
/* Prefetch the current frame, 4 MBs ahead */

1567 
s>dsp.prefetch(dst[0] + (mb_x&3)*4*s>linesize + 64, s>linesize, 4); 
1568 
s>dsp.prefetch(dst[1] + (mb_x&7)*s>uvlinesize + 64, dst[2]  dst[1], 2); 
1569  
1570 
decode_mb_mode(s, mb, mb_x, mb_y, segment_mb); 
1571  
1572 
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); 
1573  
1574 
if (!mb>skip)

1575 
decode_mb_coeffs(s, c, mb, s>top_nnz[mb_x], s>left_nnz); 
1576  
1577 
if (mb>mode <= MODE_I4x4)

1578 
intra_predict(s, dst, mb, mb_x, mb_y); 
1579 
else

1580 
inter_predict(s, dst, mb, mb_x, mb_y); 
1581  
1582 
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); 
1583  
1584 
if (!mb>skip) {

1585 
idct_mb(s, dst, mb); 
1586 
} else {

1587 
AV_ZERO64(s>left_nnz); 
1588 
AV_WN64(s>top_nnz[mb_x], 0); // array of 9, so unaligned 
1589  
1590 
// Reset DC block predictors if they would exist if the mb had coefficients

1591 
if (mb>mode != MODE_I4x4 && mb>mode != VP8_MVMODE_SPLIT) {

1592 
s>left_nnz[8] = 0; 
1593 
s>top_nnz[mb_x][8] = 0; 
1594 
} 
1595 
} 
1596  
1597 
if (s>deblock_filter)

1598 
filter_level_for_mb(s, mb, &s>filter_strength[mb_x]); 
1599  
1600 
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); 
1601  
1602 
dst[0] += 16; 
1603 
dst[1] += 8; 
1604 
dst[2] += 8; 
1605 
} 
1606 
if (s>deblock_filter) {

1607 
if (s>filter.simple)

1608 
filter_mb_row_simple(s, mb_y); 
1609 
else

1610 
filter_mb_row(s, mb_y); 
1611 
} 
1612 
} 
1613  
1614 
skip_decode:

1615 
// if future frames don't use the updated probabilities,

1616 
// reset them to the values we saved

1617 
if (!s>update_probabilities)

1618 
s>prob[0] = s>prob[1]; 
1619  
1620 
// check if golden and altref are swapped

1621 
if (s>update_altref == VP56_FRAME_GOLDEN &&

1622 
s>update_golden == VP56_FRAME_GOLDEN2) 
1623 
FFSWAP(AVFrame *, s>framep[VP56_FRAME_GOLDEN], s>framep[VP56_FRAME_GOLDEN2]); 
1624 
else {

1625 
if (s>update_altref != VP56_FRAME_NONE)

1626 
s>framep[VP56_FRAME_GOLDEN2] = s>framep[s>update_altref]; 
1627  
1628 
if (s>update_golden != VP56_FRAME_NONE)

1629 
s>framep[VP56_FRAME_GOLDEN] = s>framep[s>update_golden]; 
1630 
} 
1631  
1632 
if (s>update_last) // move cur>prev 
1633 
s>framep[VP56_FRAME_PREVIOUS] = s>framep[VP56_FRAME_CURRENT]; 
1634  
1635 
// release no longer referenced frames

1636 
for (i = 0; i < 4; i++) 
1637 
if (s>frames[i].data[0] && 
1638 
&s>frames[i] != s>framep[VP56_FRAME_CURRENT] && 
1639 
&s>frames[i] != s>framep[VP56_FRAME_PREVIOUS] && 
1640 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN] && 
1641 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN2]) 
1642 
avctx>release_buffer(avctx, &s>frames[i]); 
1643  
1644 
if (!s>invisible) {

1645 
*(AVFrame*)data = *s>framep[VP56_FRAME_CURRENT]; 
1646 
*data_size = sizeof(AVFrame);

1647 
} 
1648  
1649 
return avpkt>size;

1650 
} 
1651  
1652 
static av_cold int vp8_decode_init(AVCodecContext *avctx) 
1653 
{ 
1654 
VP8Context *s = avctx>priv_data; 
1655  
1656 
s>avctx = avctx; 
1657 
avctx>pix_fmt = PIX_FMT_YUV420P; 
1658  
1659 
dsputil_init(&s>dsp, avctx); 
1660 
ff_h264_pred_init(&s>hpc, CODEC_ID_VP8); 
1661 
ff_vp8dsp_init(&s>vp8dsp); 
1662  
1663 
// intra pred needs edge emulation among other things

1664 
if (avctx>flags&CODEC_FLAG_EMU_EDGE) {

1665 
av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported\n");

1666 
return AVERROR_PATCHWELCOME;

1667 
} 
1668  
1669 
return 0; 
1670 
} 
1671  
1672 
static av_cold int vp8_decode_free(AVCodecContext *avctx) 
1673 
{ 
1674 
vp8_decode_flush(avctx); 
1675 
return 0; 
1676 
} 
1677  
1678 
AVCodec vp8_decoder = { 
1679 
"vp8",

1680 
AVMEDIA_TYPE_VIDEO, 
1681 
CODEC_ID_VP8, 
1682 
sizeof(VP8Context),

1683 
vp8_decode_init, 
1684 
NULL,

1685 
vp8_decode_free, 
1686 
vp8_decode_frame, 
1687 
CODEC_CAP_DR1, 
1688 
.flush = vp8_decode_flush, 
1689 
.long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),

1690 
}; 