Revision dc172ecc libavcodec/h264.c
libavcodec/h264.c | ||
---|---|---|
459 | 459 |
const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8; |
460 | 460 |
int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8; |
461 | 461 |
const int luma_xy= (mx&3) + ((my&3)<<2); |
462 |
uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize; |
|
462 |
uint8_t * src_y = pic->data[0] + (mx>>2)*h->pixel_size + (my>>2)*h->mb_linesize;
|
|
463 | 463 |
uint8_t * src_cb, * src_cr; |
464 | 464 |
int extra_width= h->emu_edge_width; |
465 | 465 |
int extra_height= h->emu_edge_height; |
... | ... | |
476 | 476 |
|| full_my < 0-extra_height |
477 | 477 |
|| full_mx + 16/*FIXME*/ > pic_width + extra_width |
478 | 478 |
|| full_my + 16/*FIXME*/ > pic_height + extra_height){ |
479 |
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height); |
|
480 |
src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize; |
|
479 |
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2*h->pixel_size - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
|
|
480 |
src_y= s->edge_emu_buffer + 2*h->pixel_size + 2*h->mb_linesize;
|
|
481 | 481 |
emu=1; |
482 | 482 |
} |
483 | 483 |
|
... | ... | |
493 | 493 |
my += 2 * ((s->mb_y & 1) - (pic->reference - 1)); |
494 | 494 |
emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1); |
495 | 495 |
} |
496 |
src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize; |
|
497 |
src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize; |
|
496 |
src_cb= pic->data[1] + (mx>>3)*h->pixel_size + (my>>3)*h->mb_uvlinesize;
|
|
497 |
src_cr= pic->data[2] + (mx>>3)*h->pixel_size + (my>>3)*h->mb_uvlinesize;
|
|
498 | 498 |
|
499 | 499 |
if(emu){ |
500 | 500 |
s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1); |
... | ... | |
519 | 519 |
qpel_mc_func *qpix_op= qpix_put; |
520 | 520 |
h264_chroma_mc_func chroma_op= chroma_put; |
521 | 521 |
|
522 |
dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; |
|
523 |
dest_cb += x_offset + y_offset*h->mb_uvlinesize; |
|
524 |
dest_cr += x_offset + y_offset*h->mb_uvlinesize; |
|
522 |
dest_y += 2*x_offset*h->pixel_size + 2*y_offset*h-> mb_linesize;
|
|
523 |
dest_cb += x_offset*h->pixel_size + y_offset*h->mb_uvlinesize;
|
|
524 |
dest_cr += x_offset*h->pixel_size + y_offset*h->mb_uvlinesize;
|
|
525 | 525 |
x_offset += 8*s->mb_x; |
526 | 526 |
y_offset += 8*(s->mb_y >> MB_FIELD); |
527 | 527 |
|
... | ... | |
552 | 552 |
int list0, int list1){ |
553 | 553 |
MpegEncContext * const s = &h->s; |
554 | 554 |
|
555 |
dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize; |
|
556 |
dest_cb += x_offset + y_offset*h->mb_uvlinesize; |
|
557 |
dest_cr += x_offset + y_offset*h->mb_uvlinesize; |
|
555 |
dest_y += 2*x_offset*h->pixel_size + 2*y_offset*h-> mb_linesize;
|
|
556 |
dest_cb += x_offset*h->pixel_size + y_offset*h->mb_uvlinesize;
|
|
557 |
dest_cr += x_offset*h->pixel_size + y_offset*h->mb_uvlinesize;
|
|
558 | 558 |
x_offset += 8*s->mb_x; |
559 | 559 |
y_offset += 8*(s->mb_y >> MB_FIELD); |
560 | 560 |
|
... | ... | |
562 | 562 |
/* don't optimize for luma-only case, since B-frames usually |
563 | 563 |
* use implicit weights => chroma too. */ |
564 | 564 |
uint8_t *tmp_cb = s->obmc_scratchpad; |
565 |
uint8_t *tmp_cr = s->obmc_scratchpad + 8; |
|
565 |
uint8_t *tmp_cr = s->obmc_scratchpad + 8*h->pixel_size;
|
|
566 | 566 |
uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize; |
567 | 567 |
int refn0 = h->ref_cache[0][ scan8[n] ]; |
568 | 568 |
int refn1 = h->ref_cache[1][ scan8[n] ]; |
... | ... | |
637 | 637 |
const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8; |
638 | 638 |
const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y; |
639 | 639 |
uint8_t **src= h->ref_list[list][refn].data; |
640 |
int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
|
|
640 |
int off= mx*h->pixel_size + (my + (s->mb_x&3)*4)*h->mb_linesize + 64*h->pixel_size;
|
|
641 | 641 |
s->dsp.prefetch(src[0]+off, s->linesize, 4); |
642 |
off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
|
|
642 |
off= (mx>>1)*h->pixel_size + ((my>>1)*h->pixel_size + (s->mb_x&7))*s->uvlinesize + 64*h->pixel_size;
|
|
643 | 643 |
s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); |
644 | 644 |
} |
645 | 645 |
} |
... | ... | |
664 | 664 |
weight_op, weight_avg, |
665 | 665 |
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); |
666 | 666 |
}else if(IS_16X8(mb_type)){ |
667 |
mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0, |
|
667 |
mc_part(h, 0, 0, 4, 8*h->pixel_size, dest_y, dest_cb, dest_cr, 0, 0,
|
|
668 | 668 |
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], |
669 | 669 |
&weight_op[1], &weight_avg[1], |
670 | 670 |
IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1)); |
671 |
mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4, |
|
671 |
mc_part(h, 8, 0, 4, 8*h->pixel_size, dest_y, dest_cb, dest_cr, 0, 4,
|
|
672 | 672 |
qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0], |
673 | 673 |
&weight_op[1], &weight_avg[1], |
674 | 674 |
IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1)); |
... | ... | |
698 | 698 |
&weight_op[3], &weight_avg[3], |
699 | 699 |
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
700 | 700 |
}else if(IS_SUB_8X4(sub_mb_type)){ |
701 |
mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset, |
|
701 |
mc_part(h, n , 0, 2, 4*h->pixel_size, dest_y, dest_cb, dest_cr, x_offset, y_offset,
|
|
702 | 702 |
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], |
703 | 703 |
&weight_op[4], &weight_avg[4], |
704 | 704 |
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
705 |
mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2, |
|
705 |
mc_part(h, n+2, 0, 2, 4*h->pixel_size, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
|
|
706 | 706 |
qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1], |
707 | 707 |
&weight_op[4], &weight_avg[4], |
708 | 708 |
IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1)); |
... | ... | |
900 | 900 |
* Allocate buffers which are not shared amongst multiple threads. |
901 | 901 |
*/ |
902 | 902 |
static int context_init(H264Context *h){ |
903 |
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail) |
|
904 |
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail) |
|
903 |
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
|
|
904 |
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t)*2, fail)
|
|
905 | 905 |
|
906 | 906 |
h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] = |
907 | 907 |
h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE; |
... | ... | |
1003 | 1003 |
|
1004 | 1004 |
ff_h264_decode_init_vlc(); |
1005 | 1005 |
|
1006 |
h->pixel_size = 1; |
|
1007 |
|
|
1006 | 1008 |
h->thread_context[0] = h; |
1007 | 1009 |
h->outputed_poc = h->next_outputed_poc = INT_MIN; |
1008 | 1010 |
h->prev_poc_msb= 1<<16; |
... | ... | |
1165 | 1167 |
assert(s->linesize && s->uvlinesize); |
1166 | 1168 |
|
1167 | 1169 |
for(i=0; i<16; i++){ |
1168 |
h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3); |
|
1169 |
h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3); |
|
1170 |
h->block_offset[i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 4*s->linesize*((scan8[i] - scan8[0])>>3);
|
|
1171 |
h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 8*s->linesize*((scan8[i] - scan8[0])>>3);
|
|
1170 | 1172 |
} |
1171 | 1173 |
for(i=0; i<4; i++){ |
1172 | 1174 |
h->block_offset[16+i]= |
1173 |
h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3); |
|
1175 |
h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
|
|
1174 | 1176 |
h->block_offset[24+16+i]= |
1175 |
h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3); |
|
1177 |
h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7)*h->pixel_size + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
|
|
1176 | 1178 |
} |
1177 | 1179 |
|
1178 | 1180 |
/* can't be in alloc_tables because linesize isn't known there. |
... | ... | |
1372 | 1374 |
if(!MB_MBAFF){ |
1373 | 1375 |
top_border = h->top_borders[0][s->mb_x]; |
1374 | 1376 |
AV_COPY128(top_border, src_y + 15*linesize); |
1377 |
if (h->pixel_size == 2) |
|
1378 |
AV_COPY128(top_border+16, src_y+15*linesize+16); |
|
1375 | 1379 |
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
1380 |
if (h->pixel_size == 2) { |
|
1381 |
AV_COPY128(top_border+32, src_cb+7*uvlinesize); |
|
1382 |
AV_COPY128(top_border+48, src_cr+7*uvlinesize); |
|
1383 |
} else { |
|
1376 | 1384 |
AV_COPY64(top_border+16, src_cb+7*uvlinesize); |
1377 | 1385 |
AV_COPY64(top_border+24, src_cr+7*uvlinesize); |
1386 |
} |
|
1378 | 1387 |
} |
1379 | 1388 |
} |
1380 | 1389 |
}else if(MB_MBAFF){ |
... | ... | |
1387 | 1396 |
// There are two lines saved, the line above the the top macroblock of a pair, |
1388 | 1397 |
// and the line above the bottom macroblock |
1389 | 1398 |
AV_COPY128(top_border, src_y + 16*linesize); |
1399 |
if (h->pixel_size == 2) |
|
1400 |
AV_COPY128(top_border+16, src_y+16*linesize+16); |
|
1390 | 1401 |
|
1391 | 1402 |
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
1403 |
if (h->pixel_size == 2) { |
|
1404 |
AV_COPY128(top_border+32, src_cb+8*uvlinesize); |
|
1405 |
AV_COPY128(top_border+48, src_cr+8*uvlinesize); |
|
1406 |
} else { |
|
1392 | 1407 |
AV_COPY64(top_border+16, src_cb+8*uvlinesize); |
1393 | 1408 |
AV_COPY64(top_border+24, src_cr+8*uvlinesize); |
1409 |
} |
|
1394 | 1410 |
} |
1395 | 1411 |
} |
1396 | 1412 |
|
... | ... | |
1419 | 1435 |
deblock_top = (s->mb_y > !!MB_FIELD); |
1420 | 1436 |
} |
1421 | 1437 |
|
1422 |
src_y -= linesize + 1;
|
|
1423 |
src_cb -= uvlinesize + 1;
|
|
1424 |
src_cr -= uvlinesize + 1;
|
|
1438 |
src_y -= linesize + h->pixel_size;
|
|
1439 |
src_cb -= uvlinesize + h->pixel_size;
|
|
1440 |
src_cr -= uvlinesize + h->pixel_size;
|
|
1425 | 1441 |
|
1426 | 1442 |
top_border_m1 = h->top_borders[top_idx][s->mb_x-1]; |
1427 | 1443 |
top_border = h->top_borders[top_idx][s->mb_x]; |
1428 | 1444 |
|
1429 | 1445 |
#define XCHG(a,b,xchg)\ |
1446 |
if (h->pixel_size == 2) {\ |
|
1447 |
if (xchg) {\ |
|
1448 |
AV_SWAP64(b+0,a+0);\ |
|
1449 |
AV_SWAP64(b+8,a+8);\ |
|
1450 |
} else {\ |
|
1451 |
AV_COPY128(b,a); \ |
|
1452 |
}\ |
|
1453 |
} else \ |
|
1430 | 1454 |
if (xchg) AV_SWAP64(b,a);\ |
1431 | 1455 |
else AV_COPY64(b,a); |
1432 | 1456 |
|
1433 | 1457 |
if(deblock_top){ |
1434 | 1458 |
if(deblock_left){ |
1435 |
XCHG(top_border_m1+8, src_y -7, 1);
|
|
1459 |
XCHG(top_border_m1+8*h->pixel_size, src_y -7*h->pixel_size, 1);
|
|
1436 | 1460 |
} |
1437 |
XCHG(top_border+0, src_y +1, xchg);
|
|
1438 |
XCHG(top_border+8, src_y +9, 1);
|
|
1461 |
XCHG(top_border+0*h->pixel_size, src_y +1*h->pixel_size, xchg);
|
|
1462 |
XCHG(top_border+8*h->pixel_size, src_y +9*h->pixel_size, 1);
|
|
1439 | 1463 |
if(s->mb_x+1 < s->mb_width){ |
1440 |
XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1); |
|
1464 |
XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17*h->pixel_size, 1);
|
|
1441 | 1465 |
} |
1442 | 1466 |
} |
1443 |
|
|
1444 | 1467 |
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
1445 | 1468 |
if(deblock_top){ |
1446 | 1469 |
if(deblock_left){ |
1447 |
XCHG(top_border_m1+16, src_cb -7, 1);
|
|
1448 |
XCHG(top_border_m1+24, src_cr -7, 1);
|
|
1470 |
XCHG(top_border_m1+16*h->pixel_size, src_cb -7*h->pixel_size, 1);
|
|
1471 |
XCHG(top_border_m1+24*h->pixel_size, src_cr -7*h->pixel_size, 1);
|
|
1449 | 1472 |
} |
1450 |
XCHG(top_border+16, src_cb+1, 1);
|
|
1451 |
XCHG(top_border+24, src_cr+1, 1);
|
|
1473 |
XCHG(top_border+16*h->pixel_size, src_cb+h->pixel_size, 1);
|
|
1474 |
XCHG(top_border+24*h->pixel_size, src_cr+h->pixel_size, 1);
|
|
1452 | 1475 |
} |
1453 | 1476 |
} |
1454 | 1477 |
} |
1455 | 1478 |
|
1479 |
static av_always_inline int dctcoef_get(H264Context *h, DCTELEM *mb, int index) { |
|
1480 |
if (h->pixel_size == 1) |
|
1481 |
return mb[index]; |
|
1482 |
else |
|
1483 |
return ((int32_t*)mb)[index]; |
|
1484 |
} |
|
1485 |
|
|
1486 |
static av_always_inline void dctcoef_set(H264Context *h, DCTELEM *mb, int index, int value) { |
|
1487 |
if (h->pixel_size == 1) |
|
1488 |
mb[index] = value; |
|
1489 |
else |
|
1490 |
((int32_t*)mb)[index] = value; |
|
1491 |
} |
|
1492 |
|
|
1456 | 1493 |
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ |
1457 | 1494 |
MpegEncContext * const s = &h->s; |
1458 | 1495 |
const int mb_x= s->mb_x; |
... | ... | |
1469 | 1506 |
void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); |
1470 | 1507 |
void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); |
1471 | 1508 |
|
1472 |
dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16; |
|
1473 |
dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8; |
|
1474 |
dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8; |
|
1509 |
dest_y = s->current_picture.data[0] + (mb_x*h->pixel_size + mb_y * s->linesize ) * 16;
|
|
1510 |
dest_cb = s->current_picture.data[1] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
|
|
1511 |
dest_cr = s->current_picture.data[2] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
|
|
1475 | 1512 |
|
1476 |
s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4); |
|
1477 |
s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2); |
|
1513 |
s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64*h->pixel_size, s->linesize, 4);
|
|
1514 |
s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64*h->pixel_size, dest_cr - dest_cb, 2);
|
|
1478 | 1515 |
|
1479 | 1516 |
h->list_counts[mb_xy]= h->list_count; |
1480 | 1517 |
|
... | ... | |
1511 | 1548 |
} |
1512 | 1549 |
|
1513 | 1550 |
if (!simple && IS_INTRA_PCM(mb_type)) { |
1551 |
if (h->pixel_size == 2) { |
|
1552 |
const int bit_depth = h->sps.bit_depth_luma; |
|
1553 |
int j; |
|
1554 |
GetBitContext gb; |
|
1555 |
init_get_bits(&gb, (uint8_t*)h->mb, 384*bit_depth); |
|
1556 |
|
|
1557 |
for (i = 0; i < 16; i++) { |
|
1558 |
uint16_t *tmp_y = (uint16_t*)(dest_y + i*linesize); |
|
1559 |
for (j = 0; j < 16; j++) |
|
1560 |
tmp_y[j] = get_bits(&gb, bit_depth); |
|
1561 |
} |
|
1562 |
for (i = 0; i < 8; i++) { |
|
1563 |
uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); |
|
1564 |
for (j = 0; j < 8; j++) |
|
1565 |
tmp_cb[j] = get_bits(&gb, bit_depth); |
|
1566 |
} |
|
1567 |
for (i = 0; i < 8; i++) { |
|
1568 |
uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); |
|
1569 |
for (j = 0; j < 8; j++) |
|
1570 |
tmp_cr[j] = get_bits(&gb, bit_depth); |
|
1571 |
} |
|
1572 |
} else { |
|
1514 | 1573 |
for (i=0; i<16; i++) { |
1515 | 1574 |
memcpy(dest_y + i* linesize, h->mb + i*8, 16); |
1516 | 1575 |
} |
... | ... | |
1518 | 1577 |
memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8); |
1519 | 1578 |
memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8); |
1520 | 1579 |
} |
1580 |
} |
|
1521 | 1581 |
} else { |
1522 | 1582 |
if(IS_INTRA(mb_type)){ |
1523 | 1583 |
if(h->deblocking_filter) |
... | ... | |
1542 | 1602 |
uint8_t * const ptr= dest_y + block_offset[i]; |
1543 | 1603 |
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; |
1544 | 1604 |
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ |
1545 |
h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize); |
|
1605 |
h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16*h->pixel_size, linesize);
|
|
1546 | 1606 |
}else{ |
1547 | 1607 |
const int nnz = h->non_zero_count_cache[ scan8[i] ]; |
1548 | 1608 |
h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000, |
1549 | 1609 |
(h->topright_samples_available<<i)&0x4000, linesize); |
1550 | 1610 |
if(nnz){ |
1551 |
if(nnz == 1 && h->mb[i*16])
|
|
1552 |
idct_dc_add(ptr, h->mb + i*16, linesize); |
|
1611 |
if(nnz == 1 && dctcoef_get(h, h->mb, i*16))
|
|
1612 |
idct_dc_add(ptr, h->mb + i*16*h->pixel_size, linesize);
|
|
1553 | 1613 |
else |
1554 |
idct_add (ptr, h->mb + i*16, linesize); |
|
1614 |
idct_add (ptr, h->mb + i*16*h->pixel_size, linesize);
|
|
1555 | 1615 |
} |
1556 | 1616 |
} |
1557 | 1617 |
} |
... | ... | |
1568 | 1628 |
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; |
1569 | 1629 |
|
1570 | 1630 |
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ |
1571 |
h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize); |
|
1631 |
h->hpc.pred4x4_add[dir](ptr, h->mb + i*16*h->pixel_size, linesize);
|
|
1572 | 1632 |
}else{ |
1573 | 1633 |
uint8_t *topright; |
1574 | 1634 |
int nnz, tr; |
1635 |
uint64_t tr_high; |
|
1575 | 1636 |
if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){ |
1576 | 1637 |
const int topright_avail= (h->topright_samples_available<<i)&0x8000; |
1577 | 1638 |
assert(mb_y || linesize <= block_offset[i]); |
1578 | 1639 |
if(!topright_avail){ |
1640 |
if (h->pixel_size == 2) { |
|
1641 |
tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL; |
|
1642 |
topright= (uint8_t*) &tr_high; |
|
1643 |
} else { |
|
1579 | 1644 |
tr= ptr[3 - linesize]*0x01010101; |
1580 | 1645 |
topright= (uint8_t*) &tr; |
1646 |
} |
|
1581 | 1647 |
}else |
1582 |
topright= ptr + 4 - linesize; |
|
1648 |
topright= ptr + 4*h->pixel_size - linesize;
|
|
1583 | 1649 |
}else |
1584 | 1650 |
topright= NULL; |
1585 | 1651 |
|
... | ... | |
1587 | 1653 |
nnz = h->non_zero_count_cache[ scan8[i] ]; |
1588 | 1654 |
if(nnz){ |
1589 | 1655 |
if(is_h264){ |
1590 |
if(nnz == 1 && h->mb[i*16])
|
|
1591 |
idct_dc_add(ptr, h->mb + i*16, linesize); |
|
1656 |
if(nnz == 1 && dctcoef_get(h, h->mb, i*16))
|
|
1657 |
idct_dc_add(ptr, h->mb + i*16*h->pixel_size, linesize);
|
|
1592 | 1658 |
else |
1593 |
idct_add (ptr, h->mb + i*16, linesize); |
|
1659 |
idct_add (ptr, h->mb + i*16*h->pixel_size, linesize);
|
|
1594 | 1660 |
} |
1595 | 1661 |
#if CONFIG_SVQ3_DECODER |
1596 | 1662 |
else |
... | ... | |
1611 | 1677 |
static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16, |
1612 | 1678 |
8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16}; |
1613 | 1679 |
for(i = 0; i < 16; i++) |
1614 |
h->mb[dc_mapping[i]] = h->mb_luma_dc[i];
|
|
1680 |
dctcoef_set(h, h->mb, dc_mapping[i], dctcoef_get(h, h->mb_luma_dc, i));
|
|
1615 | 1681 |
} |
1616 | 1682 |
} |
1617 | 1683 |
} |
... | ... | |
1638 | 1704 |
h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize); |
1639 | 1705 |
}else{ |
1640 | 1706 |
for(i=0; i<16; i++){ |
1641 |
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
|
|
1642 |
s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize); |
|
1707 |
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16))
|
|
1708 |
s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16*h->pixel_size, linesize);
|
|
1643 | 1709 |
} |
1644 | 1710 |
} |
1645 | 1711 |
}else{ |
... | ... | |
1651 | 1717 |
idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; |
1652 | 1718 |
for(i=0; i<16; i+=di){ |
1653 | 1719 |
if(h->non_zero_count_cache[ scan8[i] ]){ |
1654 |
idct_add(dest_y + block_offset[i], h->mb + i*16, linesize); |
|
1720 |
idct_add(dest_y + block_offset[i], h->mb + i*16*h->pixel_size, linesize);
|
|
1655 | 1721 |
} |
1656 | 1722 |
} |
1657 | 1723 |
}else{ |
... | ... | |
1679 | 1745 |
uint8_t *dest[2] = {dest_cb, dest_cr}; |
1680 | 1746 |
if(transform_bypass){ |
1681 | 1747 |
if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){ |
1682 |
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize); |
|
1683 |
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize); |
|
1748 |
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16*h->pixel_size, uvlinesize);
|
|
1749 |
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16*h->pixel_size, uvlinesize);
|
|
1684 | 1750 |
}else{ |
1685 | 1751 |
idct_add = s->dsp.add_pixels4; |
1686 | 1752 |
for(i=16; i<16+8; i++){ |
1687 |
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
|
|
1688 |
idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize); |
|
1753 |
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16))
|
|
1754 |
idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16*h->pixel_size, uvlinesize);
|
|
1689 | 1755 |
} |
1690 | 1756 |
} |
1691 | 1757 |
}else{ |
1692 | 1758 |
if(is_h264){ |
1693 | 1759 |
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) |
1694 |
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16 , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); |
|
1760 |
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16*h->pixel_size , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
|
|
1695 | 1761 |
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) |
1696 |
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
|
|
1762 |
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16+4*16)*h->pixel_size, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
|
|
1697 | 1763 |
h->h264dsp.h264_idct_add8(dest, block_offset, |
1698 | 1764 |
h->mb, uvlinesize, |
1699 | 1765 |
h->non_zero_count_cache); |
... | ... | |
2906 | 2972 |
|
2907 | 2973 |
s->mb_x= mb_x; |
2908 | 2974 |
s->mb_y= mb_y; |
2909 |
dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16; |
|
2910 |
dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8; |
|
2911 |
dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8; |
|
2975 |
dest_y = s->current_picture.data[0] + (mb_x*h->pixel_size + mb_y * s->linesize ) * 16;
|
|
2976 |
dest_cb = s->current_picture.data[1] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
|
|
2977 |
dest_cr = s->current_picture.data[2] + (mb_x*h->pixel_size + mb_y * s->uvlinesize) * 8;
|
|
2912 | 2978 |
//FIXME simplify above |
2913 | 2979 |
|
2914 | 2980 |
if (MB_FIELD) { |
Also available in: Unified diff