ffmpeg / libavcodec / vp8.c @ 3ae079a3
History  View  Annotate  Download (56.6 KB)
1 
/**


2 
* VP8 compatible video decoder

3 
*

4 
* Copyright (C) 2010 David Conrad

5 
* Copyright (C) 2010 Ronald S. Bultje

6 
*

7 
* This file is part of FFmpeg.

8 
*

9 
* FFmpeg is free software; you can redistribute it and/or

10 
* modify it under the terms of the GNU Lesser General Public

11 
* License as published by the Free Software Foundation; either

12 
* version 2.1 of the License, or (at your option) any later version.

13 
*

14 
* FFmpeg is distributed in the hope that it will be useful,

15 
* but WITHOUT ANY WARRANTY; without even the implied warranty of

16 
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

17 
* Lesser General Public License for more details.

18 
*

19 
* You should have received a copy of the GNU Lesser General Public

20 
* License along with FFmpeg; if not, write to the Free Software

21 
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 021101301 USA

22 
*/

23  
24 
#include "avcodec.h" 
25 
#include "vp56.h" 
26 
#include "vp8data.h" 
27 
#include "vp8dsp.h" 
28 
#include "h264pred.h" 
29 
#include "rectangle.h" 
30  
31 
typedef struct { 
32 
uint8_t filter_level; 
33 
uint8_t inner_limit; 
34 
uint8_t inner_filter; 
35 
} VP8FilterStrength; 
36  
37 
typedef struct { 
38 
uint8_t skip; 
39 
// todo: make it possible to check for at least (i4x4 or split_mv)

40 
// in one op. are others needed?

41 
uint8_t mode; 
42 
uint8_t ref_frame; 
43 
uint8_t partitioning; 
44 
VP56mv mv; 
45 
VP56mv bmv[16];

46 
} VP8Macroblock; 
47  
48 
typedef struct { 
49 
AVCodecContext *avctx; 
50 
DSPContext dsp; 
51 
VP8DSPContext vp8dsp; 
52 
H264PredContext hpc; 
53 
vp8_mc_func put_pixels_tab[3][3][3]; 
54 
AVFrame frames[4];

55 
AVFrame *framep[4];

56 
uint8_t *edge_emu_buffer; 
57 
VP56RangeCoder c; ///< header context, includes mb modes and motion vectors

58 
int profile;

59  
60 
int mb_width; /* number of horizontal MB */ 
61 
int mb_height; /* number of vertical MB */ 
62 
int linesize;

63 
int uvlinesize;

64  
65 
int keyframe;

66 
int invisible;

67 
int update_last; ///< update VP56_FRAME_PREVIOUS with the current one 
68 
int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so 
69 
int update_altref;

70 
int deblock_filter;

71  
72 
/**

73 
* If this flag is not set, all the probability updates

74 
* are discarded after this frame is decoded.

75 
*/

76 
int update_probabilities;

77  
78 
/**

79 
* All coefficients are contained in separate arith coding contexts.

80 
* There can be 1, 2, 4, or 8 of these after the header context.

81 
*/

82 
int num_coeff_partitions;

83 
VP56RangeCoder coeff_partition[8];

84  
85 
VP8Macroblock *macroblocks; 
86 
VP8Macroblock *macroblocks_base; 
87 
VP8FilterStrength *filter_strength; 
88 
int mb_stride;

89  
90 
uint8_t *intra4x4_pred_mode; 
91 
uint8_t *intra4x4_pred_mode_base; 
92 
uint8_t *segmentation_map; 
93 
int b4_stride;

94  
95 
/**

96 
* Cache of the top row needed for intra prediction

97 
* 16 for luma, 8 for each chroma plane

98 
*/

99 
uint8_t (*top_border)[16+8+8]; 
100  
101 
/**

102 
* For coeff decode, we need to know whether the above block had nonzero

103 
* coefficients. This means for each macroblock, we need data for 4 luma

104 
* blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9

105 
* per macroblock. We keep the last row in top_nnz.

106 
*/

107 
uint8_t (*top_nnz)[9];

108 
DECLARE_ALIGNED(8, uint8_t, left_nnz)[9]; 
109  
110 
/**

111 
* This is the index plus one of the last nonzero coeff

112 
* for each of the blocks in the current macroblock.

113 
* So, 0 > no coeffs

114 
* 1 > dconly (special transform)

115 
* 2+> full transform

116 
*/

117 
DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; 
118 
DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; 
119 
uint8_t intra4x4_pred_mode_mb[16];

120  
121 
int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock 
122 
int segment; ///< segment of the current macroblock 
123  
124 
int mbskip_enabled;

125 
int sign_bias[4]; ///< one state [0, 1] per ref frame type 
126 
int ref_count[3]; 
127  
128 
/**

129 
* Base parameters for segmentation, i.e. permacroblock parameters.

130 
* These must be kept unchanged even if segmentation is not used for

131 
* a frame, since the values persist between interframes.

132 
*/

133 
struct {

134 
int enabled;

135 
int absolute_vals;

136 
int update_map;

137 
int8_t base_quant[4];

138 
int8_t filter_level[4]; ///< base loop filter level 
139 
} segmentation; 
140  
141 
/**

142 
* Macroblocks can have one of 4 different quants in a frame when

143 
* segmentation is enabled.

144 
* If segmentation is disabled, only the first segment's values are used.

145 
*/

146 
struct {

147 
// [0]  DC qmul [1]  AC qmul

148 
int16_t luma_qmul[2];

149 
int16_t luma_dc_qmul[2]; ///< luma dconly block quant 
150 
int16_t chroma_qmul[2];

151 
} qmat[4];

152  
153 
struct {

154 
int simple;

155 
int level;

156 
int sharpness;

157 
} filter; 
158  
159 
struct {

160 
int enabled; ///< whether each mb can have a different strength based on mode/ref 
161  
162 
/**

163 
* filter strength adjustment for the following macroblock modes:

164 
* [0]  i4x4

165 
* [1]  zero mv

166 
* [2]  inter modes except for zero or split mv

167 
* [3]  split mv

168 
* i16x16 modes never have any adjustment

169 
*/

170 
int8_t mode[4];

171  
172 
/**

173 
* filter strength adjustment for macroblocks that reference:

174 
* [0]  intra / VP56_FRAME_CURRENT

175 
* [1]  VP56_FRAME_PREVIOUS

176 
* [2]  VP56_FRAME_GOLDEN

177 
* [3]  altref / VP56_FRAME_GOLDEN2

178 
*/

179 
int8_t ref[4];

180 
} lf_delta; 
181  
182 
/**

183 
* These are all of the updatable probabilities for binary decisions.

184 
* They are only implictly reset on keyframes, making it quite likely

185 
* for an interframe to desync if a prior frame's header was corrupt

186 
* or missing outright!

187 
*/

188 
struct {

189 
uint8_t segmentid[3];

190 
uint8_t mbskip; 
191 
uint8_t intra; 
192 
uint8_t last; 
193 
uint8_t golden; 
194 
uint8_t pred16x16[4];

195 
uint8_t pred8x8c[3];

196 
uint8_t token[4][8][3][NUM_DCT_TOKENS1]; 
197 
uint8_t mvc[2][19]; 
198 
} prob[2];

199 
} VP8Context; 
200  
201 
#define RL24(p) (AV_RL16(p) + ((p)[2] << 16)) 
202  
203 
static void vp8_decode_flush(AVCodecContext *avctx) 
204 
{ 
205 
VP8Context *s = avctx>priv_data; 
206 
int i;

207  
208 
for (i = 0; i < 4; i++) 
209 
if (s>frames[i].data[0]) 
210 
avctx>release_buffer(avctx, &s>frames[i]); 
211 
memset(s>framep, 0, sizeof(s>framep)); 
212  
213 
av_freep(&s>macroblocks_base); 
214 
av_freep(&s>intra4x4_pred_mode_base); 
215 
av_freep(&s>top_nnz); 
216 
av_freep(&s>edge_emu_buffer); 
217 
av_freep(&s>top_border); 
218 
av_freep(&s>segmentation_map); 
219  
220 
s>macroblocks = NULL;

221 
s>intra4x4_pred_mode = NULL;

222 
} 
223  
224 
static int update_dimensions(VP8Context *s, int width, int height) 
225 
{ 
226 
int i;

227  
228 
if (avcodec_check_dimensions(s>avctx, width, height))

229 
return AVERROR_INVALIDDATA;

230  
231 
vp8_decode_flush(s>avctx); 
232  
233 
avcodec_set_dimensions(s>avctx, width, height); 
234  
235 
s>mb_width = (s>avctx>coded_width +15) / 16; 
236 
s>mb_height = (s>avctx>coded_height+15) / 16; 
237  
238 
// we allocate a border around the top/left of intra4x4 modes

239 
// this is 4 blocks for intra4x4 to keep 4byte alignment for fill_rectangle

240 
s>mb_stride = s>mb_width+1;

241 
s>b4_stride = 4*s>mb_stride;

242  
243 
s>macroblocks_base = av_mallocz((s>mb_stride+s>mb_height*2+2)*sizeof(*s>macroblocks)); 
244 
s>filter_strength = av_mallocz(s>mb_stride*sizeof(*s>filter_strength));

245 
s>intra4x4_pred_mode_base = av_mallocz(s>b4_stride*(4*s>mb_height+1)); 
246 
s>top_nnz = av_mallocz(s>mb_width*sizeof(*s>top_nnz));

247 
s>top_border = av_mallocz((s>mb_width+1)*sizeof(*s>top_border)); 
248 
s>segmentation_map = av_mallocz(s>mb_stride*s>mb_height); 
249  
250 
if (!s>macroblocks_base  !s>filter_strength  !s>intra4x4_pred_mode_base 

251 
!s>top_nnz  !s>top_border  !s>segmentation_map) 
252 
return AVERROR(ENOMEM);

253  
254 
s>macroblocks = s>macroblocks_base + 1;

255 
s>intra4x4_pred_mode = s>intra4x4_pred_mode_base + 4 + s>b4_stride;

256  
257 
memset(s>intra4x4_pred_mode_base, DC_PRED, s>b4_stride); 
258 
for (i = 0; i < 4*s>mb_height; i++) 
259 
s>intra4x4_pred_mode[i*s>b4_stride1] = DC_PRED;

260  
261 
return 0; 
262 
} 
263  
264 
static void parse_segment_info(VP8Context *s) 
265 
{ 
266 
VP56RangeCoder *c = &s>c; 
267 
int i;

268  
269 
s>segmentation.update_map = vp8_rac_get(c); 
270  
271 
if (vp8_rac_get(c)) { // update segment feature data 
272 
s>segmentation.absolute_vals = vp8_rac_get(c); 
273  
274 
for (i = 0; i < 4; i++) 
275 
s>segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);

276  
277 
for (i = 0; i < 4; i++) 
278 
s>segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);

279 
} 
280 
if (s>segmentation.update_map)

281 
for (i = 0; i < 3; i++) 
282 
s>prob>segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; 
283 
} 
284  
285 
static void update_lf_deltas(VP8Context *s) 
286 
{ 
287 
VP56RangeCoder *c = &s>c; 
288 
int i;

289  
290 
for (i = 0; i < 4; i++) 
291 
s>lf_delta.ref[i] = vp8_rac_get_sint(c, 6);

292  
293 
for (i = 0; i < 4; i++) 
294 
s>lf_delta.mode[i] = vp8_rac_get_sint(c, 6);

295 
} 
296  
297 
static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) 
298 
{ 
299 
const uint8_t *sizes = buf;

300 
int i;

301  
302 
s>num_coeff_partitions = 1 << vp8_rac_get_uint(&s>c, 2); 
303  
304 
buf += 3*(s>num_coeff_partitions1); 
305 
buf_size = 3*(s>num_coeff_partitions1); 
306 
if (buf_size < 0) 
307 
return 1; 
308  
309 
for (i = 0; i < s>num_coeff_partitions1; i++) { 
310 
int size = RL24(sizes + 3*i); 
311 
if (buf_size  size < 0) 
312 
return 1; 
313  
314 
vp56_init_range_decoder(&s>coeff_partition[i], buf, size); 
315 
buf += size; 
316 
buf_size = size; 
317 
} 
318 
vp56_init_range_decoder(&s>coeff_partition[i], buf, buf_size); 
319  
320 
return 0; 
321 
} 
322  
323 
static void get_quants(VP8Context *s) 
324 
{ 
325 
VP56RangeCoder *c = &s>c; 
326 
int i, base_qi;

327  
328 
int yac_qi = vp8_rac_get_uint(c, 7); 
329 
int ydc_delta = vp8_rac_get_sint(c, 4); 
330 
int y2dc_delta = vp8_rac_get_sint(c, 4); 
331 
int y2ac_delta = vp8_rac_get_sint(c, 4); 
332 
int uvdc_delta = vp8_rac_get_sint(c, 4); 
333 
int uvac_delta = vp8_rac_get_sint(c, 4); 
334  
335 
for (i = 0; i < 4; i++) { 
336 
if (s>segmentation.enabled) {

337 
base_qi = s>segmentation.base_quant[i]; 
338 
if (!s>segmentation.absolute_vals)

339 
base_qi += yac_qi; 
340 
} else

341 
base_qi = yac_qi; 
342  
343 
s>qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)]; 
344 
s>qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi , 0, 127)]; 
345 
s>qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)]; 
346 
s>qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100; 
347 
s>qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)]; 
348 
s>qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)]; 
349  
350 
s>qmat[i].luma_dc_qmul[1] = FFMAX(s>qmat[i].luma_dc_qmul[1], 8); 
351 
s>qmat[i].chroma_qmul[0] = FFMIN(s>qmat[i].chroma_qmul[0], 132); 
352 
} 
353 
} 
354  
355 
/**

356 
* Determine which buffers golden and altref should be updated with after this frame.

357 
* The spec isn't clear here, so I'm going by my understanding of what libvpx does

358 
*

359 
* Intra frames update all 3 references

360 
* Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set

361 
* If the update (goldenaltref) flag is set, it's updated with the current frame

362 
* if update_last is set, and VP56_FRAME_PREVIOUS otherwise.

363 
* If the flag is not set, the number read means:

364 
* 0: no update

365 
* 1: VP56_FRAME_PREVIOUS

366 
* 2: update golden with altref, or update altref with golden

367 
*/

368 
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref) 
369 
{ 
370 
VP56RangeCoder *c = &s>c; 
371  
372 
if (update)

373 
return VP56_FRAME_CURRENT;

374  
375 
switch (vp8_rac_get_uint(c, 2)) { 
376 
case 1: 
377 
return VP56_FRAME_PREVIOUS;

378 
case 2: 
379 
return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;

380 
} 
381 
return VP56_FRAME_NONE;

382 
} 
383  
384 
static void update_refs(VP8Context *s) 
385 
{ 
386 
VP56RangeCoder *c = &s>c; 
387  
388 
int update_golden = vp8_rac_get(c);

389 
int update_altref = vp8_rac_get(c);

390  
391 
s>update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN); 
392 
s>update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2); 
393 
} 
394  
395 
static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) 
396 
{ 
397 
VP56RangeCoder *c = &s>c; 
398 
int header_size, hscale, vscale, i, j, k, l, ret;

399 
int width = s>avctx>width;

400 
int height = s>avctx>height;

401  
402 
s>keyframe = !(buf[0] & 1); 
403 
s>profile = (buf[0]>>1) & 7; 
404 
s>invisible = !(buf[0] & 0x10); 
405 
header_size = RL24(buf) >> 5;

406 
buf += 3;

407 
buf_size = 3;

408  
409 
if (s>profile > 3) 
410 
av_log(s>avctx, AV_LOG_WARNING, "Unknown profile %d\n", s>profile);

411  
412 
if (!s>profile)

413 
memcpy(s>put_pixels_tab, s>vp8dsp.put_vp8_epel_pixels_tab, sizeof(s>put_pixels_tab));

414 
else // profile 13 use bilinear, 4+ aren't defined so whatever 
415 
memcpy(s>put_pixels_tab, s>vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s>put_pixels_tab));

416  
417 
if (header_size > buf_size  7*s>keyframe) { 
418 
av_log(s>avctx, AV_LOG_ERROR, "Header size larger than data provided\n");

419 
return AVERROR_INVALIDDATA;

420 
} 
421  
422 
if (s>keyframe) {

423 
if (RL24(buf) != 0x2a019d) { 
424 
av_log(s>avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", RL24(buf));

425 
return AVERROR_INVALIDDATA;

426 
} 
427 
width = AV_RL16(buf+3) & 0x3fff; 
428 
height = AV_RL16(buf+5) & 0x3fff; 
429 
hscale = buf[4] >> 6; 
430 
vscale = buf[6] >> 6; 
431 
buf += 7;

432 
buf_size = 7;

433  
434 
if (hscale  vscale)

435 
av_log_missing_feature(s>avctx, "Upscaling", 1); 
436  
437 
s>update_golden = s>update_altref = VP56_FRAME_CURRENT; 
438 
memcpy(s>prob>token , vp8_token_default_probs , sizeof(s>prob>token));

439 
memcpy(s>prob>pred16x16, vp8_pred16x16_prob_inter, sizeof(s>prob>pred16x16));

440 
memcpy(s>prob>pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s>prob>pred8x8c));

441 
memcpy(s>prob>mvc , vp8_mv_default_prob , sizeof(s>prob>mvc));

442 
memset(&s>segmentation, 0, sizeof(s>segmentation)); 
443 
} 
444  
445 
if (!s>macroblocks_base  /* first frame */ 
446 
width != s>avctx>width  height != s>avctx>height) { 
447 
if ((ret = update_dimensions(s, width, height) < 0)) 
448 
return ret;

449 
} 
450  
451 
vp56_init_range_decoder(c, buf, header_size); 
452 
buf += header_size; 
453 
buf_size = header_size; 
454  
455 
if (s>keyframe) {

456 
if (vp8_rac_get(c))

457 
av_log(s>avctx, AV_LOG_WARNING, "Unspecified colorspace\n");

458 
vp8_rac_get(c); // whether we can skip clamping in dsp functions

459 
} 
460  
461 
if ((s>segmentation.enabled = vp8_rac_get(c)))

462 
parse_segment_info(s); 
463 
else

464 
s>segmentation.update_map = 0; // FIXME: move this to some init function? 
465  
466 
s>filter.simple = vp8_rac_get(c); 
467 
s>filter.level = vp8_rac_get_uint(c, 6);

468 
s>filter.sharpness = vp8_rac_get_uint(c, 3);

469  
470 
if ((s>lf_delta.enabled = vp8_rac_get(c)))

471 
if (vp8_rac_get(c))

472 
update_lf_deltas(s); 
473  
474 
if (setup_partitions(s, buf, buf_size)) {

475 
av_log(s>avctx, AV_LOG_ERROR, "Invalid partitions\n");

476 
return AVERROR_INVALIDDATA;

477 
} 
478  
479 
get_quants(s); 
480  
481 
if (!s>keyframe) {

482 
update_refs(s); 
483 
s>sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c); 
484 
s>sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);

485 
} 
486  
487 
// if we aren't saving this frame's probabilities for future frames,

488 
// make a copy of the current probabilities

489 
if (!(s>update_probabilities = vp8_rac_get(c)))

490 
s>prob[1] = s>prob[0]; 
491  
492 
s>update_last = s>keyframe  vp8_rac_get(c); 
493  
494 
for (i = 0; i < 4; i++) 
495 
for (j = 0; j < 8; j++) 
496 
for (k = 0; k < 3; k++) 
497 
for (l = 0; l < NUM_DCT_TOKENS1; l++) 
498 
if (vp56_rac_get_prob(c, vp8_token_update_probs[i][j][k][l]))

499 
s>prob>token[i][j][k][l] = vp8_rac_get_uint(c, 8);

500  
501 
if ((s>mbskip_enabled = vp8_rac_get(c)))

502 
s>prob>mbskip = vp8_rac_get_uint(c, 8);

503  
504 
if (!s>keyframe) {

505 
s>prob>intra = vp8_rac_get_uint(c, 8);

506 
s>prob>last = vp8_rac_get_uint(c, 8);

507 
s>prob>golden = vp8_rac_get_uint(c, 8);

508  
509 
if (vp8_rac_get(c))

510 
for (i = 0; i < 4; i++) 
511 
s>prob>pred16x16[i] = vp8_rac_get_uint(c, 8);

512 
if (vp8_rac_get(c))

513 
for (i = 0; i < 3; i++) 
514 
s>prob>pred8x8c[i] = vp8_rac_get_uint(c, 8);

515  
516 
// 17.2 MV probability update

517 
for (i = 0; i < 2; i++) 
518 
for (j = 0; j < 19; j++) 
519 
if (vp56_rac_get_prob(c, vp8_mv_update_prob[i][j]))

520 
s>prob>mvc[i][j] = vp8_rac_get_nn(c); 
521 
} 
522  
523 
return 0; 
524 
} 
525  
526 
static inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, 
527 
int mb_x, int mb_y) 
528 
{ 
529 
#define MARGIN (16 << 2) 
530 
dst>x = av_clip(src>x, ((mb_x << 6) + MARGIN),

531 
((s>mb_width  1  mb_x) << 6) + MARGIN); 
532 
dst>y = av_clip(src>y, ((mb_y << 6) + MARGIN),

533 
((s>mb_height  1  mb_y) << 6) + MARGIN); 
534 
} 
535  
536 
static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, 
537 
VP56mv near[2], VP56mv *best, uint8_t cnt[4]) 
538 
{ 
539 
VP8Macroblock *mb_edge[3] = { mb + 2 /* top */, 
540 
mb  1 /* left */, 
541 
mb + 1 /* topleft */ }; 
542 
enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT };

543 
VP56mv near_mv[4] = {{ 0 }}; 
544 
enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };

545 
int idx = CNT_ZERO;

546 
int best_idx = CNT_ZERO;

547 
int cur_sign_bias = s>sign_bias[mb>ref_frame];

548 
int *sign_bias = s>sign_bias;

549  
550 
/* Process MB on top, left and topleft */

551 
#define MV_EDGE_CHECK(n)\

552 
{\ 
553 
VP8Macroblock *edge = mb_edge[n];\ 
554 
int edge_ref = edge>ref_frame;\

555 
if (edge_ref != VP56_FRAME_CURRENT) {\

556 
uint32_t mv = AV_RN32A(&edge>mv);\ 
557 
if (mv) {\

558 
if (cur_sign_bias != sign_bias[edge_ref]) {\

559 
/* SWAR negate of the values in mv. */\

560 
mv = ((mv&0x80008000) + 0x00010001) ^ (mv&0x7fff7fff);\ 
561 
}\ 
562 
if (!n  mv != AV_RN32A(&near_mv[idx]))\

563 
AV_WN32A(&near_mv[++idx], mv);\ 
564 
cnt[idx] += 1 + (n != 2);\ 
565 
} else\

566 
cnt[CNT_ZERO] += 1 + (n != 2);\ 
567 
}\ 
568 
} 
569 
MV_EDGE_CHECK(0)

570 
MV_EDGE_CHECK(1)

571 
MV_EDGE_CHECK(2)

572  
573 
/* If we have three distinct MVs, merge first and last if they're the same */

574 
if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1+EDGE_TOP]) == AV_RN32A(&near_mv[1+EDGE_TOPLEFT])) 
575 
cnt[CNT_NEAREST] += 1;

576  
577 
cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]>mode == VP8_MVMODE_SPLIT) + 
578 
(mb_edge[EDGE_TOP]>mode == VP8_MVMODE_SPLIT)) * 2 +

579 
(mb_edge[EDGE_TOPLEFT]>mode == VP8_MVMODE_SPLIT); 
580  
581 
/* Swap near and nearest if necessary */

582 
if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {

583 
FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]); 
584 
FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); 
585 
} 
586  
587 
/* Choose the best mv out of 0,0 and the nearest mv */

588 
if (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])

589 
best_idx = CNT_NEAREST; 
590  
591 
clamp_mv(s, best, &near_mv[best_idx], mb_x, mb_y); 
592 
near[0] = near_mv[CNT_NEAREST];

593 
near[1] = near_mv[CNT_NEAR];

594 
} 
595  
596 
/**

597 
* Motion vector coding, 17.1.

598 
*/

599 
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p) 
600 
{ 
601 
int x = 0; 
602  
603 
if (vp56_rac_get_prob(c, p[0])) { 
604 
int i;

605  
606 
for (i = 0; i < 3; i++) 
607 
x += vp56_rac_get_prob(c, p[9 + i]) << i;

608 
for (i = 9; i > 3; i) 
609 
x += vp56_rac_get_prob(c, p[9 + i]) << i;

610 
if (!(x & 0xFFF0)  vp56_rac_get_prob(c, p[12])) 
611 
x += 8;

612 
} else

613 
x = vp8_rac_get_tree(c, vp8_small_mvtree, &p[2]);

614  
615 
return (x && vp56_rac_get_prob(c, p[1])) ? x : x; 
616 
} 
617  
618 
static const uint8_t *get_submv_prob(uint32_t left, uint32_t top) 
619 
{ 
620 
if (left == top)

621 
return vp8_submv_prob[4!!left]; 
622 
if (!top)

623 
return vp8_submv_prob[2]; 
624 
return vp8_submv_prob[1!!left]; 
625 
} 
626  
627 
/**

628 
* Split motion vector prediction, 16.4.

629 
* @returns the number of motion vectors parsed (2, 4 or 16)

630 
*/

631 
static int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, 
632 
VP8Macroblock *mb, VP56mv *base_mv) 
633 
{ 
634 
int part_idx = mb>partitioning =

635 
vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob); 
636 
int n, num = vp8_mbsplit_count[part_idx];

637 
VP8Macroblock *top_mb = &mb[2];

638 
VP8Macroblock *left_mb = &mb[1];

639 
const uint8_t *mbsplits_left = vp8_mbsplits[left_mb>partitioning],

640 
*mbsplits_top = vp8_mbsplits[top_mb>partitioning], 
641 
*mbsplits_cur = vp8_mbsplits[part_idx], 
642 
*firstidx = vp8_mbfirstidx[part_idx]; 
643 
VP56mv *top_mv = top_mb>bmv; 
644 
VP56mv *left_mv = left_mb>bmv; 
645 
VP56mv *cur_mv = mb>bmv; 
646  
647 
for (n = 0; n < num; n++) { 
648 
int k = firstidx[n];

649 
uint32_t left, above; 
650 
const uint8_t *submv_prob;

651  
652 
if (!(k & 3)) 
653 
left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);

654 
else

655 
left = AV_RN32A(&cur_mv[mbsplits_cur[k  1]]);

656 
if (k <= 3) 
657 
above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);

658 
else

659 
above = AV_RN32A(&cur_mv[mbsplits_cur[k  4]]);

660  
661 
submv_prob = get_submv_prob(left, above); 
662  
663 
switch (vp8_rac_get_tree(c, vp8_submv_ref_tree, submv_prob)) {

664 
case VP8_SUBMVMODE_NEW4X4:

665 
mb>bmv[n].y = base_mv>y + read_mv_component(c, s>prob>mvc[0]);

666 
mb>bmv[n].x = base_mv>x + read_mv_component(c, s>prob>mvc[1]);

667 
break;

668 
case VP8_SUBMVMODE_ZERO4X4:

669 
AV_WN32A(&mb>bmv[n], 0);

670 
break;

671 
case VP8_SUBMVMODE_LEFT4X4:

672 
AV_WN32A(&mb>bmv[n], left); 
673 
break;

674 
case VP8_SUBMVMODE_TOP4X4:

675 
AV_WN32A(&mb>bmv[n], above); 
676 
break;

677 
} 
678 
} 
679  
680 
return num;

681 
} 
682  
683 
static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, 
684 
int stride, int keyframe) 
685 
{ 
686 
int x, y, t, l, i;

687  
688 
if (keyframe) {

689 
const uint8_t *ctx;

690 
for (y = 0; y < 4; y++) { 
691 
for (x = 0; x < 4; x++) { 
692 
t = intra4x4[x  stride]; 
693 
l = intra4x4[x  1];

694 
ctx = vp8_pred4x4_prob_intra[t][l]; 
695 
intra4x4[x] = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); 
696 
} 
697 
intra4x4 += stride; 
698 
} 
699 
} else {

700 
for (i = 0; i < 16; i++) 
701 
intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter); 
702 
} 
703 
} 
704  
705 
static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, 
706 
uint8_t *intra4x4, uint8_t *segment) 
707 
{ 
708 
VP56RangeCoder *c = &s>c; 
709  
710 
if (s>segmentation.update_map)

711 
*segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s>prob>segmentid); 
712 
s>segment = *segment; 
713  
714 
mb>skip = s>mbskip_enabled ? vp56_rac_get_prob(c, s>prob>mbskip) : 0;

715  
716 
if (s>keyframe) {

717 
mb>mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); 
718  
719 
if (mb>mode == MODE_I4x4) {

720 
decode_intra4x4_modes(c, intra4x4, s>b4_stride, 1);

721 
} else

722 
fill_rectangle(intra4x4, 4, 4, s>b4_stride, vp8_pred4x4_mode[mb>mode], 1); 
723  
724 
s>chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); 
725 
mb>ref_frame = VP56_FRAME_CURRENT; 
726 
} else if (vp56_rac_get_prob(c, s>prob>intra)) { 
727 
VP56mv near[2], best;

728 
uint8_t cnt[4] = { 0 }; 
729 
uint8_t p[4];

730  
731 
// inter MB, 16.2

732 
if (vp56_rac_get_prob(c, s>prob>last))

733 
mb>ref_frame = vp56_rac_get_prob(c, s>prob>golden) ? 
734 
VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;

735 
else

736 
mb>ref_frame = VP56_FRAME_PREVIOUS; 
737 
s>ref_count[mb>ref_frame1]++;

738  
739 
// motion vectors, 16.3

740 
find_near_mvs(s, mb, mb_x, mb_y, near, &best, cnt); 
741 
p[0] = vp8_mode_contexts[cnt[0]][0]; 
742 
p[1] = vp8_mode_contexts[cnt[1]][1]; 
743 
p[2] = vp8_mode_contexts[cnt[2]][2]; 
744 
p[3] = vp8_mode_contexts[cnt[3]][3]; 
745 
mb>mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_mvinter, p); 
746 
switch (mb>mode) {

747 
case VP8_MVMODE_SPLIT:

748 
mb>mv = mb>bmv[decode_splitmvs(s, c, mb, &best)  1];

749 
break;

750 
case VP8_MVMODE_ZERO:

751 
AV_WN32A(&mb>mv, 0);

752 
break;

753 
case VP8_MVMODE_NEAREST:

754 
clamp_mv(s, &mb>mv, &near[0], mb_x, mb_y);

755 
break;

756 
case VP8_MVMODE_NEAR:

757 
clamp_mv(s, &mb>mv, &near[1], mb_x, mb_y);

758 
break;

759 
case VP8_MVMODE_NEW:

760 
mb>mv.y = best.y + read_mv_component(c, s>prob>mvc[0]);

761 
mb>mv.x = best.x + read_mv_component(c, s>prob>mvc[1]);

762 
break;

763 
} 
764 
if (mb>mode != VP8_MVMODE_SPLIT) {

765 
mb>partitioning = VP8_SPLITMVMODE_NONE; 
766 
mb>bmv[0] = mb>mv;

767 
} 
768 
} else {

769 
// intra MB, 16.1

770 
mb>mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s>prob>pred16x16); 
771  
772 
if (mb>mode == MODE_I4x4)

773 
decode_intra4x4_modes(c, intra4x4, 4, 0); 
774  
775 
s>chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s>prob>pred8x8c); 
776 
mb>ref_frame = VP56_FRAME_CURRENT; 
777 
mb>partitioning = VP8_SPLITMVMODE_NONE; 
778 
AV_WN32A(&mb>bmv[0], 0); 
779 
} 
780 
} 
781  
782 
/**

783 
* @param c arithmetic bitstream reader context

784 
* @param block destination for block coefficients

785 
* @param probs probabilities to use when reading trees from the bitstream

786 
* @param i initial coeff index, 0 unless a separate DC block is coded

787 
* @param zero_nhood the initial prediction context for number of surrounding

788 
* allzero blocks (only left/top, so 02)

789 
* @param qmul array holding the dc/ac dequant factor at position 0/1

790 
* @return 0 if no coeffs were decoded

791 
* otherwise, the index of the last coeff decoded plus one

792 
*/

793 
static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], 
794 
uint8_t probs[8][3][NUM_DCT_TOKENS1], 
795 
int i, int zero_nhood, int16_t qmul[2]) 
796 
{ 
797 
int token, nonzero = 0; 
798 
int offset = 0; 
799  
800 
for (; i < 16; i++) { 
801 
token = vp8_rac_get_tree_with_offset(c, vp8_coeff_tree, probs[vp8_coeff_band[i]][zero_nhood], offset); 
802  
803 
if (token == DCT_EOB)

804 
break;

805 
else if (token >= DCT_CAT1) { 
806 
int cat = tokenDCT_CAT1;

807 
token = vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); 
808 
token += 3 + (2<<cat); 
809 
} 
810  
811 
// after the first token, the nonzero prediction context becomes

812 
// based on the last decoded coeff

813 
if (!token) {

814 
zero_nhood = 0;

815 
offset = 1;

816 
continue;

817 
} else if (token == 1) 
818 
zero_nhood = 1;

819 
else

820 
zero_nhood = 2;

821  
822 
// todo: full [16] qmat? load into register?

823 
block[zigzag_scan[i]] = (vp8_rac_get(c) ? token : token) * qmul[!!i]; 
824 
nonzero = i+1;

825 
offset = 0;

826 
} 
827 
return nonzero;

828 
} 
829  
830 
static void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, 
831 
uint8_t t_nnz[9], uint8_t l_nnz[9]) 
832 
{ 
833 
LOCAL_ALIGNED_16(DCTELEM, dc,[16]);

834 
int i, x, y, luma_start = 0, luma_ctx = 3; 
835 
int nnz_pred, nnz, nnz_total = 0; 
836 
int segment = s>segment;

837  
838 
if (mb>mode != MODE_I4x4 && mb>mode != VP8_MVMODE_SPLIT) {

839 
AV_ZERO128(dc); 
840 
AV_ZERO128(dc+8);

841 
nnz_pred = t_nnz[8] + l_nnz[8]; 
842  
843 
// decode DC values and do hadamard

844 
nnz = decode_block_coeffs(c, dc, s>prob>token[1], 0, nnz_pred, 
845 
s>qmat[segment].luma_dc_qmul); 
846 
l_nnz[8] = t_nnz[8] = !!nnz; 
847 
nnz_total += nnz; 
848 
s>vp8dsp.vp8_luma_dc_wht(s>block, dc); 
849 
luma_start = 1;

850 
luma_ctx = 0;

851 
} 
852  
853 
// luma blocks

854 
for (y = 0; y < 4; y++) 
855 
for (x = 0; x < 4; x++) { 
856 
nnz_pred = l_nnz[y] + t_nnz[x]; 
857 
nnz = decode_block_coeffs(c, s>block[y][x], s>prob>token[luma_ctx], luma_start, 
858 
nnz_pred, s>qmat[segment].luma_qmul); 
859 
// nnz+luma_start may be one more than the actual last index, but we don't care

860 
s>non_zero_count_cache[y][x] = nnz + luma_start; 
861 
t_nnz[x] = l_nnz[y] = !!nnz; 
862 
nnz_total += nnz; 
863 
} 
864  
865 
// chroma blocks

866 
// TODO: what to do about dimensions? 2nd dim for luma is x,

867 
// but for chroma it's (y<<1)x

868 
for (i = 4; i < 6; i++) 
869 
for (y = 0; y < 2; y++) 
870 
for (x = 0; x < 2; x++) { 
871 
nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x]; 
872 
nnz = decode_block_coeffs(c, s>block[i][(y<<1)+x], s>prob>token[2], 0, 
873 
nnz_pred, s>qmat[segment].chroma_qmul); 
874 
s>non_zero_count_cache[i][(y<<1)+x] = nnz;

875 
t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz; 
876 
nnz_total += nnz; 
877 
} 
878  
879 
// if there were no coded coeffs despite the macroblock not being marked skip,

880 
// we MUST not do the inner loop filter and should not do IDCT

881 
// Since skip isn't used for bitstream prediction, just manually set it.

882 
if (!nnz_total)

883 
mb>skip = 1;

884 
} 
885  
886 
static av_always_inline

887 
void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,

888 
int linesize, int uvlinesize, int simple) 
889 
{ 
890 
AV_COPY128(top_border, src_y + 15*linesize);

891 
if (!simple) {

892 
AV_COPY64(top_border+16, src_cb + 7*uvlinesize); 
893 
AV_COPY64(top_border+24, src_cr + 7*uvlinesize); 
894 
} 
895 
} 
896  
897 
static av_always_inline

898 
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,

899 
int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width, 
900 
int simple, int xchg) 
901 
{ 
902 
uint8_t *top_border_m1 = top_border32; // for TL prediction 
903 
src_y = linesize; 
904 
src_cb = uvlinesize; 
905 
src_cr = uvlinesize; 
906  
907 
#define XCHG(a,b,xchg) do { \ 
908 
if (xchg) AV_SWAP64(b,a); \

909 
else AV_COPY64(b,a); \

910 
} while (0) 
911  
912 
XCHG(top_border_m1+8, src_y8, xchg); 
913 
XCHG(top_border, src_y, xchg); 
914 
XCHG(top_border+8, src_y+8, 1); 
915 
if (mb_x < mb_width1) 
916 
XCHG(top_border+32, src_y+16, 1); 
917  
918 
// only copy chroma for normal loop filter

919 
// or to initialize the top row to 127

920 
if (!simple  !mb_y) {

921 
XCHG(top_border_m1+16, src_cb8, xchg); 
922 
XCHG(top_border_m1+24, src_cr8, xchg); 
923 
XCHG(top_border+16, src_cb, 1); 
924 
XCHG(top_border+24, src_cr, 1); 
925 
} 
926 
} 
927  
928 
static int check_intra_pred_mode(int mode, int mb_x, int mb_y) 
929 
{ 
930 
if (mode == DC_PRED8x8) {

931 
if (!(mb_xmb_y))

932 
mode = DC_128_PRED8x8; 
933 
else if (!mb_y) 
934 
mode = LEFT_DC_PRED8x8; 
935 
else if (!mb_x) 
936 
mode = TOP_DC_PRED8x8; 
937 
} 
938 
return mode;

939 
} 
940  
941 
static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, 
942 
uint8_t *intra4x4, int mb_x, int mb_y) 
943 
{ 
944 
int x, y, mode, nnz, tr;

945  
946 
// for the first row, we need to run xchg_mb_border to init the top edge to 127

947 
// otherwise, skip it if we aren't going to deblock

948 
if (s>deblock_filter  !mb_y)

949 
xchg_mb_border(s>top_border[mb_x+1], dst[0], dst[1], dst[2], 
950 
s>linesize, s>uvlinesize, mb_x, mb_y, s>mb_width, 
951 
s>filter.simple, 1);

952  
953 
if (mb>mode < MODE_I4x4) {

954 
mode = check_intra_pred_mode(mb>mode, mb_x, mb_y); 
955 
s>hpc.pred16x16[mode](dst[0], s>linesize);

956 
} else {

957 
uint8_t *ptr = dst[0];

958 
int stride = s>keyframe ? s>b4_stride : 4; 
959  
960 
// all blocks on the right edge of the macroblock use bottom edge

961 
// the top macroblock for their topright edge

962 
uint8_t *tr_right = ptr  s>linesize + 16;

963  
964 
// if we're on the right edge of the frame, said edge is extended

965 
// from the top macroblock

966 
if (mb_x == s>mb_width1) { 
967 
tr = tr_right[1]*0x01010101; 
968 
tr_right = (uint8_t *)&tr; 
969 
} 
970  
971 
if (mb>skip)

972 
AV_ZERO128(s>non_zero_count_cache); 
973  
974 
for (y = 0; y < 4; y++) { 
975 
uint8_t *topright = ptr + 4  s>linesize;

976 
for (x = 0; x < 4; x++) { 
977 
if (x == 3) 
978 
topright = tr_right; 
979  
980 
s>hpc.pred4x4[intra4x4[x]](ptr+4*x, topright, s>linesize);

981  
982 
nnz = s>non_zero_count_cache[y][x]; 
983 
if (nnz) {

984 
if (nnz == 1) 
985 
s>vp8dsp.vp8_idct_dc_add(ptr+4*x, s>block[y][x], s>linesize);

986 
else

987 
s>vp8dsp.vp8_idct_add(ptr+4*x, s>block[y][x], s>linesize);

988 
} 
989 
topright += 4;

990 
} 
991  
992 
ptr += 4*s>linesize;

993 
intra4x4 += stride; 
994 
} 
995 
} 
996  
997 
mode = check_intra_pred_mode(s>chroma_pred_mode, mb_x, mb_y); 
998 
s>hpc.pred8x8[mode](dst[1], s>uvlinesize);

999 
s>hpc.pred8x8[mode](dst[2], s>uvlinesize);

1000  
1001 
if (s>deblock_filter  !mb_y)

1002 
xchg_mb_border(s>top_border[mb_x+1], dst[0], dst[1], dst[2], 
1003 
s>linesize, s>uvlinesize, mb_x, mb_y, s>mb_width, 
1004 
s>filter.simple, 0);

1005 
} 
1006  
1007 
/**

1008 
* Generic MC function.

1009 
*

1010 
* @param s VP8 decoding context

1011 
* @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes

1012 
* @param dst target buffer for block data at block position

1013 
* @param src reference picture buffer at origin (0, 0)

1014 
* @param mv motion vector (relative to block position) to get pixel data from

1015 
* @param x_off horizontal position of block from origin (0, 0)

1016 
* @param y_off vertical position of block from origin (0, 0)

1017 
* @param block_w width of block (16, 8 or 4)

1018 
* @param block_h height of block (always same as block_w)

1019 
* @param width width of src/dst plane data

1020 
* @param height height of src/dst plane data

1021 
* @param linesize size of a single line of plane data, including padding

1022 
* @param mc_func motion compensation function pointers (bilinear or sixtap MC)

1023 
*/

1024 
static inline void vp8_mc(VP8Context *s, int luma, 
1025 
uint8_t *dst, uint8_t *src, const VP56mv *mv,

1026 
int x_off, int y_off, int block_w, int block_h, 
1027 
int width, int height, int linesize, 
1028 
vp8_mc_func mc_func[3][3]) 
1029 
{ 
1030 
if (AV_RN32A(mv)) {

1031 
static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; 
1032 
int mx = (mv>x << luma)&7, mx_idx = idx[mx]; 
1033 
int my = (mv>y << luma)&7, my_idx = idx[my]; 
1034  
1035 
x_off += mv>x >> (3  luma);

1036 
y_off += mv>y >> (3  luma);

1037  
1038 
// edge emulation

1039 
src += y_off * linesize + x_off; 
1040 
if (x_off < 2  x_off >= width  block_w  3  
1041 
y_off < 2  y_off >= height  block_h  3) { 
1042 
ff_emulated_edge_mc(s>edge_emu_buffer, src  2 * linesize  2, linesize, 
1043 
block_w + 5, block_h + 5, 
1044 
x_off  2, y_off  2, width, height); 
1045 
src = s>edge_emu_buffer + 2 + linesize * 2; 
1046 
} 
1047 
mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my); 
1048 
} else

1049 
mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0); 
1050 
} 
1051  
1052 
static inline void vp8_mc_part(VP8Context *s, uint8_t *dst[3], 
1053 
AVFrame *ref_frame, int x_off, int y_off, 
1054 
int bx_off, int by_off, 
1055 
int block_w, int block_h, 
1056 
int width, int height, VP56mv *mv) 
1057 
{ 
1058 
VP56mv uvmv = *mv; 
1059  
1060 
/* Y */

1061 
vp8_mc(s, 1, dst[0] + by_off * s>linesize + bx_off, 
1062 
ref_frame>data[0], mv, x_off + bx_off, y_off + by_off,

1063 
block_w, block_h, width, height, s>linesize, 
1064 
s>put_pixels_tab[block_w == 8]);

1065  
1066 
/* U/V */

1067 
if (s>profile == 3) { 
1068 
uvmv.x &= ~7;

1069 
uvmv.y &= ~7;

1070 
} 
1071 
x_off >>= 1; y_off >>= 1; 
1072 
bx_off >>= 1; by_off >>= 1; 
1073 
width >>= 1; height >>= 1; 
1074 
block_w >>= 1; block_h >>= 1; 
1075 
vp8_mc(s, 0, dst[1] + by_off * s>uvlinesize + bx_off, 
1076 
ref_frame>data[1], &uvmv, x_off + bx_off, y_off + by_off,

1077 
block_w, block_h, width, height, s>uvlinesize, 
1078 
s>put_pixels_tab[1 + (block_w == 4)]); 
1079 
vp8_mc(s, 0, dst[2] + by_off * s>uvlinesize + bx_off, 
1080 
ref_frame>data[2], &uvmv, x_off + bx_off, y_off + by_off,

1081 
block_w, block_h, width, height, s>uvlinesize, 
1082 
s>put_pixels_tab[1 + (block_w == 4)]); 
1083 
} 
1084  
1085 
/* Fetch pixels for estimated mv 4 macroblocks ahead.

1086 
* Optimized for 64byte cache lines. Inspired by ffh264 prefetch_motion. */

1087 
static inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref) 
1088 
{ 
1089 
/* Don't prefetch refs that haven't been used very often this frame. */

1090 
if (s>ref_count[ref1] > (mb_xy >> 5)) { 
1091 
int x_off = mb_x << 4, y_off = mb_y << 4; 
1092 
int mx = mb>mv.x + x_off + 8; 
1093 
int my = mb>mv.y + y_off;

1094 
uint8_t **src= s>framep[ref]>data; 
1095 
int off= mx + (my + (mb_x&3)*4)*s>linesize + 64; 
1096 
s>dsp.prefetch(src[0]+off, s>linesize, 4); 
1097 
off= (mx>>1) + ((my>>1) + (mb_x&7))*s>uvlinesize + 64; 
1098 
s>dsp.prefetch(src[1]+off, src[2]src[1], 2); 
1099 
} 
1100 
} 
1101  
1102 
/**

1103 
* Apply motion vectors to prediction buffer, chapter 18.

1104 
*/

1105 
static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, 
1106 
int mb_x, int mb_y) 
1107 
{ 
1108 
int x_off = mb_x << 4, y_off = mb_y << 4; 
1109 
int width = 16*s>mb_width, height = 16*s>mb_height; 
1110 
AVFrame *ref = s>framep[mb>ref_frame]; 
1111 
VP56mv *bmv = mb>bmv; 
1112  
1113 
if (mb>mode < VP8_MVMODE_SPLIT) {

1114 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1115 
0, 0, 16, 16, width, height, &mb>mv); 
1116 
} else switch (mb>partitioning) { 
1117 
case VP8_SPLITMVMODE_4x4: {

1118 
int x, y;

1119 
VP56mv uvmv; 
1120  
1121 
/* Y */

1122 
for (y = 0; y < 4; y++) { 
1123 
for (x = 0; x < 4; x++) { 
1124 
vp8_mc(s, 1, dst[0] + 4*y*s>linesize + x*4, 
1125 
ref>data[0], &bmv[4*y + x], 
1126 
4*x + x_off, 4*y + y_off, 4, 4, 
1127 
width, height, s>linesize, 
1128 
s>put_pixels_tab[2]);

1129 
} 
1130 
} 
1131  
1132 
/* U/V */

1133 
x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1; 
1134 
for (y = 0; y < 2; y++) { 
1135 
for (x = 0; x < 2; x++) { 
1136 
uvmv.x = mb>bmv[ 2*y * 4 + 2*x ].x + 
1137 
mb>bmv[ 2*y * 4 + 2*x+1].x + 
1138 
mb>bmv[(2*y+1) * 4 + 2*x ].x + 
1139 
mb>bmv[(2*y+1) * 4 + 2*x+1].x; 
1140 
uvmv.y = mb>bmv[ 2*y * 4 + 2*x ].y + 
1141 
mb>bmv[ 2*y * 4 + 2*x+1].y + 
1142 
mb>bmv[(2*y+1) * 4 + 2*x ].y + 
1143 
mb>bmv[(2*y+1) * 4 + 2*x+1].y; 
1144 
uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT1))) >> 2; 
1145 
uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT1))) >> 2; 
1146 
if (s>profile == 3) { 
1147 
uvmv.x &= ~7;

1148 
uvmv.y &= ~7;

1149 
} 
1150 
vp8_mc(s, 0, dst[1] + 4*y*s>uvlinesize + x*4, 
1151 
ref>data[1], &uvmv,

1152 
4*x + x_off, 4*y + y_off, 4, 4, 
1153 
width, height, s>uvlinesize, 
1154 
s>put_pixels_tab[2]);

1155 
vp8_mc(s, 0, dst[2] + 4*y*s>uvlinesize + x*4, 
1156 
ref>data[2], &uvmv,

1157 
4*x + x_off, 4*y + y_off, 4, 4, 
1158 
width, height, s>uvlinesize, 
1159 
s>put_pixels_tab[2]);

1160 
} 
1161 
} 
1162 
break;

1163 
} 
1164 
case VP8_SPLITMVMODE_16x8:

1165 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1166 
0, 0, 16, 8, width, height, &bmv[0]); 
1167 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1168 
0, 8, 16, 8, width, height, &bmv[1]); 
1169 
break;

1170 
case VP8_SPLITMVMODE_8x16:

1171 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1172 
0, 0, 8, 16, width, height, &bmv[0]); 
1173 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1174 
8, 0, 8, 16, width, height, &bmv[1]); 
1175 
break;

1176 
case VP8_SPLITMVMODE_8x8:

1177 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1178 
0, 0, 8, 8, width, height, &bmv[0]); 
1179 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1180 
8, 0, 8, 8, width, height, &bmv[1]); 
1181 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1182 
0, 8, 8, 8, width, height, &bmv[2]); 
1183 
vp8_mc_part(s, dst, ref, x_off, y_off, 
1184 
8, 8, 8, 8, width, height, &bmv[3]); 
1185 
break;

1186 
} 
1187 
} 
1188  
1189 
static void idct_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb) 
1190 
{ 
1191 
int x, y, ch;

1192  
1193 
if (mb>mode != MODE_I4x4) {

1194 
uint8_t *y_dst = dst[0];

1195 
for (y = 0; y < 4; y++) { 
1196 
uint32_t nnz4 = AV_RN32A(s>non_zero_count_cache[y]); 
1197 
if (nnz4) {

1198 
if (nnz4&~0x01010101) { 
1199 
for (x = 0; x < 4; x++) { 
1200 
int nnz = s>non_zero_count_cache[y][x];

1201 
if (nnz) {

1202 
if (nnz == 1) 
1203 
s>vp8dsp.vp8_idct_dc_add(y_dst+4*x, s>block[y][x], s>linesize);

1204 
else

1205 
s>vp8dsp.vp8_idct_add(y_dst+4*x, s>block[y][x], s>linesize);

1206 
} 
1207 
} 
1208 
} else {

1209 
s>vp8dsp.vp8_idct_dc_add4y(y_dst, s>block[y], s>linesize); 
1210 
} 
1211 
} 
1212 
y_dst += 4*s>linesize;

1213 
} 
1214 
} 
1215  
1216 
for (ch = 0; ch < 2; ch++) { 
1217 
uint32_t nnz4 = AV_RN32A(s>non_zero_count_cache[4+ch]);

1218 
if (nnz4) {

1219 
uint8_t *ch_dst = dst[1+ch];

1220 
if (nnz4&~0x01010101) { 
1221 
for (y = 0; y < 2; y++) { 
1222 
for (x = 0; x < 2; x++) { 
1223 
int nnz = s>non_zero_count_cache[4+ch][(y<<1)+x]; 
1224 
if (nnz) {

1225 
if (nnz == 1) 
1226 
s>vp8dsp.vp8_idct_dc_add(ch_dst+4*x, s>block[4+ch][(y<<1)+x], s>uvlinesize); 
1227 
else

1228 
s>vp8dsp.vp8_idct_add(ch_dst+4*x, s>block[4+ch][(y<<1)+x], s>uvlinesize); 
1229 
} 
1230 
} 
1231 
ch_dst += 4*s>uvlinesize;

1232 
} 
1233 
} else {

1234 
s>vp8dsp.vp8_idct_dc_add4uv(ch_dst, s>block[4+ch], s>uvlinesize);

1235 
} 
1236 
} 
1237 
} 
1238 
} 
1239  
1240 
static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f ) 
1241 
{ 
1242 
int interior_limit, filter_level;

1243  
1244 
if (s>segmentation.enabled) {

1245 
filter_level = s>segmentation.filter_level[s>segment]; 
1246 
if (!s>segmentation.absolute_vals)

1247 
filter_level += s>filter.level; 
1248 
} else

1249 
filter_level = s>filter.level; 
1250  
1251 
if (s>lf_delta.enabled) {

1252 
filter_level += s>lf_delta.ref[mb>ref_frame]; 
1253  
1254 
if (mb>ref_frame == VP56_FRAME_CURRENT) {

1255 
if (mb>mode == MODE_I4x4)

1256 
filter_level += s>lf_delta.mode[0];

1257 
} else {

1258 
if (mb>mode == VP8_MVMODE_ZERO)

1259 
filter_level += s>lf_delta.mode[1];

1260 
else if (mb>mode == VP8_MVMODE_SPLIT) 
1261 
filter_level += s>lf_delta.mode[3];

1262 
else

1263 
filter_level += s>lf_delta.mode[2];

1264 
} 
1265 
} 
1266 
filter_level = av_clip(filter_level, 0, 63); 
1267  
1268 
interior_limit = filter_level; 
1269 
if (s>filter.sharpness) {

1270 
interior_limit >>= s>filter.sharpness > 4 ? 2 : 1; 
1271 
interior_limit = FFMIN(interior_limit, 9  s>filter.sharpness);

1272 
} 
1273 
interior_limit = FFMAX(interior_limit, 1);

1274  
1275 
f>filter_level = filter_level; 
1276 
f>inner_limit = interior_limit; 
1277 
f>inner_filter = !mb>skip  mb>mode == MODE_I4x4  mb>mode == VP8_MVMODE_SPLIT; 
1278 
} 
1279  
1280 
static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y) 
1281 
{ 
1282 
int mbedge_lim, bedge_lim, hev_thresh;

1283 
int filter_level = f>filter_level;

1284 
int inner_limit = f>inner_limit;

1285 
int inner_filter = f>inner_filter;

1286 
int linesize = s>linesize;

1287 
int uvlinesize = s>uvlinesize;

1288  
1289 
if (!filter_level)

1290 
return;

1291  
1292 
mbedge_lim = 2*(filter_level+2) + inner_limit; 
1293 
bedge_lim = 2* filter_level + inner_limit;

1294 
hev_thresh = filter_level >= 15;

1295  
1296 
if (s>keyframe) {

1297 
if (filter_level >= 40) 
1298 
hev_thresh = 2;

1299 
} else {

1300 
if (filter_level >= 40) 
1301 
hev_thresh = 3;

1302 
else if (filter_level >= 20) 
1303 
hev_thresh = 2;

1304 
} 
1305  
1306 
if (mb_x) {

1307 
s>vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,

1308 
mbedge_lim, inner_limit, hev_thresh); 
1309 
s>vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize, 
1310 
mbedge_lim, inner_limit, hev_thresh); 
1311 
} 
1312  
1313 
if (inner_filter) {

1314 
s>vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim, 
1315 
inner_limit, hev_thresh); 
1316 
s>vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim, 
1317 
inner_limit, hev_thresh); 
1318 
s>vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim, 
1319 
inner_limit, hev_thresh); 
1320 
s>vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, 
1321 
uvlinesize, bedge_lim, 
1322 
inner_limit, hev_thresh); 
1323 
} 
1324  
1325 
if (mb_y) {

1326 
s>vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,

1327 
mbedge_lim, inner_limit, hev_thresh); 
1328 
s>vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize, 
1329 
mbedge_lim, inner_limit, hev_thresh); 
1330 
} 
1331  
1332 
if (inner_filter) {

1333 
s>vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize, 
1334 
linesize, bedge_lim, 
1335 
inner_limit, hev_thresh); 
1336 
s>vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize, 
1337 
linesize, bedge_lim, 
1338 
inner_limit, hev_thresh); 
1339 
s>vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize, 
1340 
linesize, bedge_lim, 
1341 
inner_limit, hev_thresh); 
1342 
s>vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize, 
1343 
dst[2] + 4 * uvlinesize, 
1344 
uvlinesize, bedge_lim, 
1345 
inner_limit, hev_thresh); 
1346 
} 
1347 
} 
1348  
1349 
static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y) 
1350 
{ 
1351 
int mbedge_lim, bedge_lim;

1352 
int filter_level = f>filter_level;

1353 
int inner_limit = f>inner_limit;

1354 
int inner_filter = f>inner_filter;

1355 
int linesize = s>linesize;

1356  
1357 
if (!filter_level)

1358 
return;

1359  
1360 
mbedge_lim = 2*(filter_level+2) + inner_limit; 
1361 
bedge_lim = 2* filter_level + inner_limit;

1362  
1363 
if (mb_x)

1364 
s>vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim); 
1365 
if (inner_filter) {

1366 
s>vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);

1367 
s>vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);

1368 
s>vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);

1369 
} 
1370  
1371 
if (mb_y)

1372 
s>vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim); 
1373 
if (inner_filter) {

1374 
s>vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);

1375 
s>vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);

1376 
s>vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);

1377 
} 
1378 
} 
1379  
1380 
static void filter_mb_row(VP8Context *s, int mb_y) 
1381 
{ 
1382 
VP8FilterStrength *f = s>filter_strength; 
1383 
uint8_t *dst[3] = {

1384 
s>framep[VP56_FRAME_CURRENT]>data[0] + 16*mb_y*s>linesize, 
1385 
s>framep[VP56_FRAME_CURRENT]>data[1] + 8*mb_y*s>uvlinesize, 
1386 
s>framep[VP56_FRAME_CURRENT]>data[2] + 8*mb_y*s>uvlinesize 
1387 
}; 
1388 
int mb_x;

1389  
1390 
for (mb_x = 0; mb_x < s>mb_width; mb_x++) { 
1391 
backup_mb_border(s>top_border[mb_x+1], dst[0], dst[1], dst[2], s>linesize, s>uvlinesize, 0); 
1392 
filter_mb(s, dst, f++, mb_x, mb_y); 
1393 
dst[0] += 16; 
1394 
dst[1] += 8; 
1395 
dst[2] += 8; 
1396 
} 
1397 
} 
1398  
1399 
static void filter_mb_row_simple(VP8Context *s, int mb_y) 
1400 
{ 
1401 
VP8FilterStrength *f = s>filter_strength; 
1402 
uint8_t *dst = s>framep[VP56_FRAME_CURRENT]>data[0] + 16*mb_y*s>linesize; 
1403 
int mb_x;

1404  
1405 
for (mb_x = 0; mb_x < s>mb_width; mb_x++) { 
1406 
backup_mb_border(s>top_border[mb_x+1], dst, NULL, NULL, s>linesize, 0, 1); 
1407 
filter_mb_simple(s, dst, f++, mb_x, mb_y); 
1408 
dst += 16;

1409 
} 
1410 
} 
1411  
1412 
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, 
1413 
AVPacket *avpkt) 
1414 
{ 
1415 
VP8Context *s = avctx>priv_data; 
1416 
int ret, mb_x, mb_y, i, y, referenced;

1417 
enum AVDiscard skip_thresh;

1418 
AVFrame *curframe = NULL;

1419  
1420 
if ((ret = decode_frame_header(s, avpkt>data, avpkt>size)) < 0) 
1421 
return ret;

1422  
1423 
referenced = s>update_last  s>update_golden == VP56_FRAME_CURRENT 
1424 
 s>update_altref == VP56_FRAME_CURRENT; 
1425  
1426 
skip_thresh = !referenced ? AVDISCARD_NONREF : 
1427 
!s>keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL; 
1428  
1429 
if (avctx>skip_frame >= skip_thresh) {

1430 
s>invisible = 1;

1431 
goto skip_decode;

1432 
} 
1433 
s>deblock_filter = s>filter.level && avctx>skip_loop_filter < skip_thresh; 
1434  
1435 
for (i = 0; i < 4; i++) 
1436 
if (&s>frames[i] != s>framep[VP56_FRAME_PREVIOUS] &&

1437 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN] && 
1438 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN2]) { 
1439 
curframe = s>framep[VP56_FRAME_CURRENT] = &s>frames[i]; 
1440 
break;

1441 
} 
1442 
if (curframe>data[0]) 
1443 
avctx>release_buffer(avctx, curframe); 
1444  
1445 
curframe>key_frame = s>keyframe; 
1446 
curframe>pict_type = s>keyframe ? FF_I_TYPE : FF_P_TYPE; 
1447 
curframe>reference = referenced ? 3 : 0; 
1448 
if ((ret = avctx>get_buffer(avctx, curframe))) {

1449 
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");

1450 
return ret;

1451 
} 
1452  
1453 
// Given that arithmetic probabilities are updated every frame, it's quite likely

1454 
// that the values we have on a random interframe are complete junk if we didn't

1455 
// start decode on a keyframe. So just don't display anything rather than junk.

1456 
if (!s>keyframe && (!s>framep[VP56_FRAME_PREVIOUS] 

1457 
!s>framep[VP56_FRAME_GOLDEN]  
1458 
!s>framep[VP56_FRAME_GOLDEN2])) { 
1459 
av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");

1460 
return AVERROR_INVALIDDATA;

1461 
} 
1462  
1463 
s>linesize = curframe>linesize[0];

1464 
s>uvlinesize = curframe>linesize[1];

1465  
1466 
if (!s>edge_emu_buffer)

1467 
s>edge_emu_buffer = av_malloc(21*s>linesize);

1468  
1469 
memset(s>top_nnz, 0, s>mb_width*sizeof(*s>top_nnz)); 
1470  
1471 
/* Zero macroblock structures for top/left prediction from outside the frame. */

1472 
memset(s>macroblocks, 0, (s>mb_width + s>mb_height*2)*sizeof(*s>macroblocks)); 
1473  
1474 
// top edge of 127 for intra prediction

1475 
memset(s>top_border, 127, (s>mb_width+1)*sizeof(*s>top_border)); 
1476 
memset(s>ref_count, 0, sizeof(s>ref_count)); 
1477  
1478 
for (mb_y = 0; mb_y < s>mb_height; mb_y++) { 
1479 
VP56RangeCoder *c = &s>coeff_partition[mb_y & (s>num_coeff_partitions1)];

1480 
VP8Macroblock *mb = s>macroblocks + (s>mb_height  mb_y  1)*2; 
1481 
uint8_t *intra4x4 = s>intra4x4_pred_mode + 4*mb_y*s>b4_stride;

1482 
uint8_t *segment_map = s>segmentation_map + mb_y*s>mb_stride; 
1483 
int mb_xy = mb_y * s>mb_stride;

1484 
uint8_t *dst[3] = {

1485 
curframe>data[0] + 16*mb_y*s>linesize, 
1486 
curframe>data[1] + 8*mb_y*s>uvlinesize, 
1487 
curframe>data[2] + 8*mb_y*s>uvlinesize 
1488 
}; 
1489  
1490 
memset(s>left_nnz, 0, sizeof(s>left_nnz)); 
1491  
1492 
// left edge of 129 for intra prediction

1493 
if (!(avctx>flags & CODEC_FLAG_EMU_EDGE))

1494 
for (i = 0; i < 3; i++) 
1495 
for (y = 0; y < 16>>!!i; y++) 
1496 
dst[i][y*curframe>linesize[i]1] = 129; 
1497 
if (mb_y)

1498 
memset(s>top_border, 129, sizeof(*s>top_border)); 
1499  
1500 
for (mb_x = 0; mb_x < s>mb_width; mb_x++, mb_xy++, mb++) { 
1501 
uint8_t *intra4x4_mb = s>keyframe ? intra4x4 + 4*mb_x : s>intra4x4_pred_mode_mb;

1502 
uint8_t *segment_mb = segment_map+mb_x; 
1503  
1504 
/* Prefetch the current frame, 4 MBs ahead */

1505 
s>dsp.prefetch(dst[0] + (mb_x&3)*4*s>linesize + 64, s>linesize, 4); 
1506 
s>dsp.prefetch(dst[1] + (mb_x&7)*s>uvlinesize + 64, dst[2]  dst[1], 2); 
1507  
1508 
decode_mb_mode(s, mb, mb_x, mb_y, intra4x4_mb, segment_mb); 
1509  
1510 
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS); 
1511  
1512 
if (!mb>skip)

1513 
decode_mb_coeffs(s, c, mb, s>top_nnz[mb_x], s>left_nnz); 
1514  
1515 
if (mb>mode <= MODE_I4x4)

1516 
intra_predict(s, dst, mb, intra4x4_mb, mb_x, mb_y); 
1517 
else

1518 
inter_predict(s, dst, mb, mb_x, mb_y); 
1519  
1520 
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN); 
1521  
1522 
if (!mb>skip) {

1523 
idct_mb(s, dst, mb); 
1524 
} else {

1525 
AV_ZERO64(s>left_nnz); 
1526 
AV_WN64(s>top_nnz[mb_x], 0); // array of 9, so unaligned 
1527  
1528 
// Reset DC block predictors if they would exist if the mb had coefficients

1529 
if (mb>mode != MODE_I4x4 && mb>mode != VP8_MVMODE_SPLIT) {

1530 
s>left_nnz[8] = 0; 
1531 
s>top_nnz[mb_x][8] = 0; 
1532 
} 
1533 
} 
1534  
1535 
if (s>deblock_filter)

1536 
filter_level_for_mb(s, mb, &s>filter_strength[mb_x]); 
1537  
1538 
prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2); 
1539  
1540 
dst[0] += 16; 
1541 
dst[1] += 8; 
1542 
dst[2] += 8; 
1543 
} 
1544 
if (s>deblock_filter) {

1545 
if (s>filter.simple)

1546 
filter_mb_row_simple(s, mb_y); 
1547 
else

1548 
filter_mb_row(s, mb_y); 
1549 
} 
1550 
} 
1551  
1552 
skip_decode:

1553 
// if future frames don't use the updated probabilities,

1554 
// reset them to the values we saved

1555 
if (!s>update_probabilities)

1556 
s>prob[0] = s>prob[1]; 
1557  
1558 
// check if golden and altref are swapped

1559 
if (s>update_altref == VP56_FRAME_GOLDEN &&

1560 
s>update_golden == VP56_FRAME_GOLDEN2) 
1561 
FFSWAP(AVFrame *, s>framep[VP56_FRAME_GOLDEN], s>framep[VP56_FRAME_GOLDEN2]); 
1562 
else {

1563 
if (s>update_altref != VP56_FRAME_NONE)

1564 
s>framep[VP56_FRAME_GOLDEN2] = s>framep[s>update_altref]; 
1565  
1566 
if (s>update_golden != VP56_FRAME_NONE)

1567 
s>framep[VP56_FRAME_GOLDEN] = s>framep[s>update_golden]; 
1568 
} 
1569  
1570 
if (s>update_last) // move cur>prev 
1571 
s>framep[VP56_FRAME_PREVIOUS] = s>framep[VP56_FRAME_CURRENT]; 
1572  
1573 
// release no longer referenced frames

1574 
for (i = 0; i < 4; i++) 
1575 
if (s>frames[i].data[0] && 
1576 
&s>frames[i] != s>framep[VP56_FRAME_CURRENT] && 
1577 
&s>frames[i] != s>framep[VP56_FRAME_PREVIOUS] && 
1578 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN] && 
1579 
&s>frames[i] != s>framep[VP56_FRAME_GOLDEN2]) 
1580 
avctx>release_buffer(avctx, &s>frames[i]); 
1581  
1582 
if (!s>invisible) {

1583 
*(AVFrame*)data = *s>framep[VP56_FRAME_CURRENT]; 
1584 
*data_size = sizeof(AVFrame);

1585 
} 
1586  
1587 
return avpkt>size;

1588 
} 
1589  
1590 
static av_cold int vp8_decode_init(AVCodecContext *avctx) 
1591 
{ 
1592 
VP8Context *s = avctx>priv_data; 
1593  
1594 
s>avctx = avctx; 
1595 
avctx>pix_fmt = PIX_FMT_YUV420P; 
1596  
1597 
dsputil_init(&s>dsp, avctx); 
1598 
ff_h264_pred_init(&s>hpc, CODEC_ID_VP8); 
1599 
ff_vp8dsp_init(&s>vp8dsp); 
1600  
1601 
// intra pred needs edge emulation among other things

1602 
if (avctx>flags&CODEC_FLAG_EMU_EDGE) {

1603 
av_log(avctx, AV_LOG_ERROR, "Edge emulation not supported\n");

1604 
return AVERROR_PATCHWELCOME;

1605 
} 
1606  
1607 
return 0; 
1608 
} 
1609  
1610 
static av_cold int vp8_decode_free(AVCodecContext *avctx) 
1611 
{ 
1612 
vp8_decode_flush(avctx); 
1613 
return 0; 
1614 
} 
1615  
1616 
AVCodec vp8_decoder = { 
1617 
"vp8",

1618 
AVMEDIA_TYPE_VIDEO, 
1619 
CODEC_ID_VP8, 
1620 
sizeof(VP8Context),

1621 
vp8_decode_init, 
1622 
NULL,

1623 
vp8_decode_free, 
1624 
vp8_decode_frame, 
1625 
CODEC_CAP_DR1, 
1626 
.flush = vp8_decode_flush, 
1627 
.long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),

1628 
}; 