Revision df2d5b16
libavcodec/h264.c | ||
---|---|---|
996 | 996 |
} |
997 | 997 |
} |
998 | 998 |
|
999 |
static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){ |
|
999 |
static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple, int pixel_shift){
|
|
1000 | 1000 |
MpegEncContext * const s = &h->s; |
1001 | 1001 |
int deblock_left; |
1002 | 1002 |
int deblock_top; |
... | ... | |
1021 | 1021 |
deblock_top = (s->mb_y > !!MB_FIELD); |
1022 | 1022 |
} |
1023 | 1023 |
|
1024 |
src_y -= linesize + 1 + h->pixel_shift;
|
|
1025 |
src_cb -= uvlinesize + 1 + h->pixel_shift;
|
|
1026 |
src_cr -= uvlinesize + 1 + h->pixel_shift;
|
|
1024 |
src_y -= linesize + 1 + pixel_shift; |
|
1025 |
src_cb -= uvlinesize + 1 + pixel_shift; |
|
1026 |
src_cr -= uvlinesize + 1 + pixel_shift; |
|
1027 | 1027 |
|
1028 | 1028 |
top_border_m1 = h->top_borders[top_idx][s->mb_x-1]; |
1029 | 1029 |
top_border = h->top_borders[top_idx][s->mb_x]; |
1030 | 1030 |
|
1031 | 1031 |
#define XCHG(a,b,xchg)\ |
1032 |
if (h->pixel_shift) {\
|
|
1032 |
if (pixel_shift) {\ |
|
1033 | 1033 |
if (xchg) {\ |
1034 | 1034 |
AV_SWAP64(b+0,a+0);\ |
1035 | 1035 |
AV_SWAP64(b+8,a+8);\ |
... | ... | |
1042 | 1042 |
|
1043 | 1043 |
if(deblock_top){ |
1044 | 1044 |
if(deblock_left){ |
1045 |
XCHG(top_border_m1+(8<<h->pixel_shift), src_y -(7<<h->pixel_shift), 1);
|
|
1045 |
XCHG(top_border_m1+(8<<pixel_shift), src_y -(7<<h->pixel_shift), 1); |
|
1046 | 1046 |
} |
1047 |
XCHG(top_border+(0<<h->pixel_shift), src_y +(1<<h->pixel_shift), xchg);
|
|
1048 |
XCHG(top_border+(8<<h->pixel_shift), src_y +(9<<h->pixel_shift), 1);
|
|
1047 |
XCHG(top_border+(0<<pixel_shift), src_y +(1<<pixel_shift), xchg);
|
|
1048 |
XCHG(top_border+(8<<pixel_shift), src_y +(9<<pixel_shift), 1);
|
|
1049 | 1049 |
if(s->mb_x+1 < s->mb_width){ |
1050 |
XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +(17<<h->pixel_shift), 1);
|
|
1050 |
XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +(17<<pixel_shift), 1); |
|
1051 | 1051 |
} |
1052 | 1052 |
} |
1053 | 1053 |
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
1054 | 1054 |
if(deblock_top){ |
1055 | 1055 |
if(deblock_left){ |
1056 |
XCHG(top_border_m1+(16<<h->pixel_shift), src_cb -(7<<h->pixel_shift), 1);
|
|
1057 |
XCHG(top_border_m1+(24<<h->pixel_shift), src_cr -(7<<h->pixel_shift), 1);
|
|
1056 |
XCHG(top_border_m1+(16<<pixel_shift), src_cb -(7<<pixel_shift), 1);
|
|
1057 |
XCHG(top_border_m1+(24<<pixel_shift), src_cr -(7<<pixel_shift), 1);
|
|
1058 | 1058 |
} |
1059 |
XCHG(top_border+(16<<h->pixel_shift), src_cb+1+h->pixel_shift, 1);
|
|
1060 |
XCHG(top_border+(24<<h->pixel_shift), src_cr+1+h->pixel_shift, 1);
|
|
1059 |
XCHG(top_border+(16<<pixel_shift), src_cb+1+pixel_shift, 1);
|
|
1060 |
XCHG(top_border+(24<<pixel_shift), src_cr+1+pixel_shift, 1);
|
|
1061 | 1061 |
} |
1062 | 1062 |
} |
1063 | 1063 |
} |
1064 | 1064 |
|
1065 |
static av_always_inline int dctcoef_get(H264Context *h, DCTELEM *mb, int index) { |
|
1066 |
if (!h->pixel_shift)
|
|
1065 |
static av_always_inline int dctcoef_get(H264Context *h, DCTELEM *mb, int index, int pixel_shift) {
|
|
1066 |
if (!pixel_shift) |
|
1067 | 1067 |
return mb[index]; |
1068 | 1068 |
else |
1069 | 1069 |
return ((int32_t*)mb)[index]; |
1070 | 1070 |
} |
1071 | 1071 |
|
1072 |
static av_always_inline void dctcoef_set(H264Context *h, DCTELEM *mb, int index, int value) { |
|
1073 |
if (!h->pixel_shift)
|
|
1072 |
static av_always_inline void dctcoef_set(H264Context *h, DCTELEM *mb, int index, int value, int pixel_shift) {
|
|
1073 |
if (!pixel_shift) |
|
1074 | 1074 |
mb[index] = value; |
1075 | 1075 |
else |
1076 | 1076 |
((int32_t*)mb)[index] = value; |
1077 | 1077 |
} |
1078 | 1078 |
|
1079 |
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){ |
|
1079 |
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
|
|
1080 | 1080 |
MpegEncContext * const s = &h->s; |
1081 | 1081 |
const int mb_x= s->mb_x; |
1082 | 1082 |
const int mb_y= s->mb_y; |
... | ... | |
1092 | 1092 |
void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); |
1093 | 1093 |
void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride); |
1094 | 1094 |
|
1095 |
dest_y = s->current_picture.data[0] + ((mb_x<<h->pixel_shift) + mb_y * s->linesize ) * 16;
|
|
1096 |
dest_cb = s->current_picture.data[1] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8;
|
|
1097 |
dest_cr = s->current_picture.data[2] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8;
|
|
1095 |
dest_y = s->current_picture.data[0] + ((mb_x<<pixel_shift) + mb_y * s->linesize ) * 16; |
|
1096 |
dest_cb = s->current_picture.data[1] + ((mb_x<<pixel_shift) + mb_y * s->uvlinesize) * 8; |
|
1097 |
dest_cr = s->current_picture.data[2] + ((mb_x<<pixel_shift) + mb_y * s->uvlinesize) * 8; |
|
1098 | 1098 |
|
1099 |
s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64<<h->pixel_shift), s->linesize, 4);
|
|
1100 |
s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64<<h->pixel_shift), dest_cr - dest_cb, 2);
|
|
1099 |
s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64<<pixel_shift), s->linesize, 4); |
|
1100 |
s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64<<pixel_shift), dest_cr - dest_cb, 2); |
|
1101 | 1101 |
|
1102 | 1102 |
h->list_counts[mb_xy]= h->list_count; |
1103 | 1103 |
|
... | ... | |
1134 | 1134 |
} |
1135 | 1135 |
|
1136 | 1136 |
if (!simple && IS_INTRA_PCM(mb_type)) { |
1137 |
if (h->pixel_shift) {
|
|
1137 |
if (pixel_shift) { |
|
1138 | 1138 |
const int bit_depth = h->sps.bit_depth_luma; |
1139 | 1139 |
int j; |
1140 | 1140 |
GetBitContext gb; |
... | ... | |
1167 | 1167 |
} else { |
1168 | 1168 |
if(IS_INTRA(mb_type)){ |
1169 | 1169 |
if(h->deblocking_filter) |
1170 |
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple); |
|
1170 |
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple, pixel_shift);
|
|
1171 | 1171 |
|
1172 | 1172 |
if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){ |
1173 | 1173 |
h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize); |
... | ... | |
1188 | 1188 |
uint8_t * const ptr= dest_y + block_offset[i]; |
1189 | 1189 |
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; |
1190 | 1190 |
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ |
1191 |
h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16<<h->pixel_shift), linesize);
|
|
1191 |
h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16<<pixel_shift), linesize); |
|
1192 | 1192 |
}else{ |
1193 | 1193 |
const int nnz = h->non_zero_count_cache[ scan8[i] ]; |
1194 | 1194 |
h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000, |
1195 | 1195 |
(h->topright_samples_available<<i)&0x4000, linesize); |
1196 | 1196 |
if(nnz){ |
1197 |
if(nnz == 1 && dctcoef_get(h, h->mb, i*16)) |
|
1198 |
idct_dc_add(ptr, h->mb + (i*16<<h->pixel_shift), linesize);
|
|
1197 |
if(nnz == 1 && dctcoef_get(h, h->mb, i*16, pixel_shift))
|
|
1198 |
idct_dc_add(ptr, h->mb + (i*16<<pixel_shift), linesize); |
|
1199 | 1199 |
else |
1200 |
idct_add (ptr, h->mb + (i*16<<h->pixel_shift), linesize);
|
|
1200 |
idct_add (ptr, h->mb + (i*16<<pixel_shift), linesize); |
|
1201 | 1201 |
} |
1202 | 1202 |
} |
1203 | 1203 |
} |
... | ... | |
1214 | 1214 |
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ]; |
1215 | 1215 |
|
1216 | 1216 |
if(transform_bypass && h->sps.profile_idc==244 && dir<=1){ |
1217 |
h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16<<h->pixel_shift), linesize);
|
|
1217 |
h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16<<pixel_shift), linesize); |
|
1218 | 1218 |
}else{ |
1219 | 1219 |
uint8_t *topright; |
1220 | 1220 |
int nnz, tr; |
... | ... | |
1223 | 1223 |
const int topright_avail= (h->topright_samples_available<<i)&0x8000; |
1224 | 1224 |
assert(mb_y || linesize <= block_offset[i]); |
1225 | 1225 |
if(!topright_avail){ |
1226 |
if (h->pixel_shift) {
|
|
1226 |
if (pixel_shift) { |
|
1227 | 1227 |
tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL; |
1228 | 1228 |
topright= (uint8_t*) &tr_high; |
1229 | 1229 |
} else { |
... | ... | |
1231 | 1231 |
topright= (uint8_t*) &tr; |
1232 | 1232 |
} |
1233 | 1233 |
}else |
1234 |
topright= ptr + (4<<h->pixel_shift) - linesize;
|
|
1234 |
topright= ptr + (4<<pixel_shift) - linesize; |
|
1235 | 1235 |
}else |
1236 | 1236 |
topright= NULL; |
1237 | 1237 |
|
... | ... | |
1239 | 1239 |
nnz = h->non_zero_count_cache[ scan8[i] ]; |
1240 | 1240 |
if(nnz){ |
1241 | 1241 |
if(is_h264){ |
1242 |
if(nnz == 1 && dctcoef_get(h, h->mb, i*16)) |
|
1243 |
idct_dc_add(ptr, h->mb + (i*16<<h->pixel_shift), linesize);
|
|
1242 |
if(nnz == 1 && dctcoef_get(h, h->mb, i*16, pixel_shift))
|
|
1243 |
idct_dc_add(ptr, h->mb + (i*16<<pixel_shift), linesize); |
|
1244 | 1244 |
else |
1245 |
idct_add (ptr, h->mb + (i*16<<h->pixel_shift), linesize);
|
|
1245 |
idct_add (ptr, h->mb + (i*16<<pixel_shift), linesize); |
|
1246 | 1246 |
} |
1247 | 1247 |
#if CONFIG_SVQ3_DECODER |
1248 | 1248 |
else |
... | ... | |
1263 | 1263 |
static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16, |
1264 | 1264 |
8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16}; |
1265 | 1265 |
for(i = 0; i < 16; i++) |
1266 |
dctcoef_set(h, h->mb, dc_mapping[i], dctcoef_get(h, h->mb_luma_dc, i));
|
|
1266 |
dctcoef_set(h, h->mb, dc_mapping[i], dctcoef_get(h, h->mb_luma_dc, i,pixel_shift),pixel_shift);
|
|
1267 | 1267 |
} |
1268 | 1268 |
} |
1269 | 1269 |
} |
... | ... | |
1273 | 1273 |
#endif |
1274 | 1274 |
} |
1275 | 1275 |
if(h->deblocking_filter) |
1276 |
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple); |
|
1276 |
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple, pixel_shift);
|
|
1277 | 1277 |
}else if(is_h264){ |
1278 | 1278 |
ff_hl_motion(h, dest_y, dest_cb, dest_cr, |
1279 | 1279 |
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, |
... | ... | |
1290 | 1290 |
h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize); |
1291 | 1291 |
}else{ |
1292 | 1292 |
for(i=0; i<16; i++){ |
1293 |
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16)) |
|
1294 |
s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16<<h->pixel_shift), linesize);
|
|
1293 |
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16,pixel_shift))
|
|
1294 |
s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16<<pixel_shift), linesize); |
|
1295 | 1295 |
} |
1296 | 1296 |
} |
1297 | 1297 |
}else{ |
... | ... | |
1303 | 1303 |
idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4; |
1304 | 1304 |
for(i=0; i<16; i+=di){ |
1305 | 1305 |
if(h->non_zero_count_cache[ scan8[i] ]){ |
1306 |
idct_add(dest_y + block_offset[i], h->mb + (i*16<<h->pixel_shift), linesize);
|
|
1306 |
idct_add(dest_y + block_offset[i], h->mb + (i*16<<pixel_shift), linesize); |
|
1307 | 1307 |
} |
1308 | 1308 |
} |
1309 | 1309 |
}else{ |
... | ... | |
1331 | 1331 |
uint8_t *dest[2] = {dest_cb, dest_cr}; |
1332 | 1332 |
if(transform_bypass){ |
1333 | 1333 |
if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){ |
1334 |
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16<<h->pixel_shift), uvlinesize);
|
|
1335 |
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16<<h->pixel_shift), uvlinesize);
|
|
1334 |
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16<<pixel_shift), uvlinesize); |
|
1335 |
h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16<<pixel_shift), uvlinesize); |
|
1336 | 1336 |
}else{ |
1337 | 1337 |
idct_add = s->dsp.add_pixels4; |
1338 | 1338 |
for(i=16; i<16+8; i++){ |
1339 |
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16)) |
|
1340 |
idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16<<h->pixel_shift), uvlinesize);
|
|
1339 |
if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16,pixel_shift))
|
|
1340 |
idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16<<pixel_shift), uvlinesize); |
|
1341 | 1341 |
} |
1342 | 1342 |
} |
1343 | 1343 |
}else{ |
1344 | 1344 |
if(is_h264){ |
1345 | 1345 |
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) |
1346 |
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16<<h->pixel_shift) , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
|
|
1346 |
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16<<pixel_shift) , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); |
|
1347 | 1347 |
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) |
1348 |
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16)<<h->pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
|
|
1348 |
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16)<<pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); |
|
1349 | 1349 |
h->h264dsp.h264_idct_add8(dest, block_offset, |
1350 | 1350 |
h->mb, uvlinesize, |
1351 | 1351 |
h->non_zero_count_cache); |
... | ... | |
1372 | 1372 |
/** |
1373 | 1373 |
* Process a macroblock; this case avoids checks for expensive uncommon cases. |
1374 | 1374 |
*/ |
1375 |
static void hl_decode_mb_simple(H264Context *h){ |
|
1376 |
hl_decode_mb_internal(h, 1); |
|
1375 |
static void hl_decode_mb_simple8(H264Context *h){
|
|
1376 |
hl_decode_mb_internal(h, 1, 0);
|
|
1377 | 1377 |
} |
1378 | 1378 |
|
1379 | 1379 |
/** |
1380 | 1380 |
* Process a macroblock; this handles edge cases, such as interlacing. |
1381 | 1381 |
*/ |
1382 | 1382 |
static void av_noinline hl_decode_mb_complex(H264Context *h){ |
1383 |
hl_decode_mb_internal(h, 0); |
|
1383 |
hl_decode_mb_internal(h, 0, h->pixel_shift);
|
|
1384 | 1384 |
} |
1385 | 1385 |
|
1386 | 1386 |
void ff_h264_hl_decode_mb(H264Context *h){ |
... | ... | |
1389 | 1389 |
const int mb_type= s->current_picture.mb_type[mb_xy]; |
1390 | 1390 |
int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0; |
1391 | 1391 |
|
1392 |
if (is_complex) |
|
1392 |
if (is_complex || h->pixel_shift)
|
|
1393 | 1393 |
hl_decode_mb_complex(h); |
1394 |
else hl_decode_mb_simple(h); |
|
1394 |
else{ |
|
1395 |
hl_decode_mb_simple8(h); |
|
1396 |
} |
|
1395 | 1397 |
} |
1396 | 1398 |
|
1397 | 1399 |
static int pred_weight_table(H264Context *h){ |
Also available in: Unified diff