Revision df2d5b16

View differences:

libavcodec/h264.c
996 996
    }
997 997
}
998 998

  
999
static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
999
static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple, int pixel_shift){
1000 1000
    MpegEncContext * const s = &h->s;
1001 1001
    int deblock_left;
1002 1002
    int deblock_top;
......
1021 1021
        deblock_top =  (s->mb_y > !!MB_FIELD);
1022 1022
    }
1023 1023

  
1024
    src_y  -=   linesize + 1 + h->pixel_shift;
1025
    src_cb -= uvlinesize + 1 + h->pixel_shift;
1026
    src_cr -= uvlinesize + 1 + h->pixel_shift;
1024
    src_y  -=   linesize + 1 + pixel_shift;
1025
    src_cb -= uvlinesize + 1 + pixel_shift;
1026
    src_cr -= uvlinesize + 1 + pixel_shift;
1027 1027

  
1028 1028
    top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
1029 1029
    top_border    = h->top_borders[top_idx][s->mb_x];
1030 1030

  
1031 1031
#define XCHG(a,b,xchg)\
1032
    if (h->pixel_shift) {\
1032
    if (pixel_shift) {\
1033 1033
        if (xchg) {\
1034 1034
            AV_SWAP64(b+0,a+0);\
1035 1035
            AV_SWAP64(b+8,a+8);\
......
1042 1042

  
1043 1043
    if(deblock_top){
1044 1044
        if(deblock_left){
1045
            XCHG(top_border_m1+(8<<h->pixel_shift), src_y -(7<<h->pixel_shift), 1);
1045
            XCHG(top_border_m1+(8<<pixel_shift), src_y -(7<<h->pixel_shift), 1);
1046 1046
        }
1047
        XCHG(top_border+(0<<h->pixel_shift), src_y +(1<<h->pixel_shift), xchg);
1048
        XCHG(top_border+(8<<h->pixel_shift), src_y +(9<<h->pixel_shift), 1);
1047
        XCHG(top_border+(0<<pixel_shift), src_y +(1<<pixel_shift), xchg);
1048
        XCHG(top_border+(8<<pixel_shift), src_y +(9<<pixel_shift), 1);
1049 1049
        if(s->mb_x+1 < s->mb_width){
1050
            XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +(17<<h->pixel_shift), 1);
1050
            XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +(17<<pixel_shift), 1);
1051 1051
        }
1052 1052
    }
1053 1053
    if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1054 1054
        if(deblock_top){
1055 1055
            if(deblock_left){
1056
                XCHG(top_border_m1+(16<<h->pixel_shift), src_cb -(7<<h->pixel_shift), 1);
1057
                XCHG(top_border_m1+(24<<h->pixel_shift), src_cr -(7<<h->pixel_shift), 1);
1056
                XCHG(top_border_m1+(16<<pixel_shift), src_cb -(7<<pixel_shift), 1);
1057
                XCHG(top_border_m1+(24<<pixel_shift), src_cr -(7<<pixel_shift), 1);
1058 1058
            }
1059
            XCHG(top_border+(16<<h->pixel_shift), src_cb+1+h->pixel_shift, 1);
1060
            XCHG(top_border+(24<<h->pixel_shift), src_cr+1+h->pixel_shift, 1);
1059
            XCHG(top_border+(16<<pixel_shift), src_cb+1+pixel_shift, 1);
1060
            XCHG(top_border+(24<<pixel_shift), src_cr+1+pixel_shift, 1);
1061 1061
        }
1062 1062
    }
1063 1063
}
1064 1064

  
1065
static av_always_inline int dctcoef_get(H264Context *h, DCTELEM *mb, int index) {
1066
    if (!h->pixel_shift)
1065
static av_always_inline int dctcoef_get(H264Context *h, DCTELEM *mb, int index, int pixel_shift) {
1066
    if (!pixel_shift)
1067 1067
        return mb[index];
1068 1068
    else
1069 1069
        return ((int32_t*)mb)[index];
1070 1070
}
1071 1071

  
1072
static av_always_inline void dctcoef_set(H264Context *h, DCTELEM *mb, int index, int value) {
1073
    if (!h->pixel_shift)
1072
static av_always_inline void dctcoef_set(H264Context *h, DCTELEM *mb, int index, int value, int pixel_shift) {
1073
    if (!pixel_shift)
1074 1074
        mb[index] = value;
1075 1075
    else
1076 1076
        ((int32_t*)mb)[index] = value;
1077 1077
}
1078 1078

  
1079
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
1079
static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, int pixel_shift){
1080 1080
    MpegEncContext * const s = &h->s;
1081 1081
    const int mb_x= s->mb_x;
1082 1082
    const int mb_y= s->mb_y;
......
1092 1092
    void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
1093 1093
    void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
1094 1094

  
1095
    dest_y  = s->current_picture.data[0] + ((mb_x<<h->pixel_shift) + mb_y * s->linesize  ) * 16;
1096
    dest_cb = s->current_picture.data[1] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8;
1097
    dest_cr = s->current_picture.data[2] + ((mb_x<<h->pixel_shift) + mb_y * s->uvlinesize) * 8;
1095
    dest_y  = s->current_picture.data[0] + ((mb_x<<pixel_shift) + mb_y * s->linesize  ) * 16;
1096
    dest_cb = s->current_picture.data[1] + ((mb_x<<pixel_shift) + mb_y * s->uvlinesize) * 8;
1097
    dest_cr = s->current_picture.data[2] + ((mb_x<<pixel_shift) + mb_y * s->uvlinesize) * 8;
1098 1098

  
1099
    s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64<<h->pixel_shift), s->linesize, 4);
1100
    s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64<<h->pixel_shift), dest_cr - dest_cb, 2);
1099
    s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64<<pixel_shift), s->linesize, 4);
1100
    s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64<<pixel_shift), dest_cr - dest_cb, 2);
1101 1101

  
1102 1102
    h->list_counts[mb_xy]= h->list_count;
1103 1103

  
......
1134 1134
    }
1135 1135

  
1136 1136
    if (!simple && IS_INTRA_PCM(mb_type)) {
1137
        if (h->pixel_shift) {
1137
        if (pixel_shift) {
1138 1138
            const int bit_depth = h->sps.bit_depth_luma;
1139 1139
            int j;
1140 1140
            GetBitContext gb;
......
1167 1167
    } else {
1168 1168
        if(IS_INTRA(mb_type)){
1169 1169
            if(h->deblocking_filter)
1170
                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
1170
                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple, pixel_shift);
1171 1171

  
1172 1172
            if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1173 1173
                h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
......
1188 1188
                            uint8_t * const ptr= dest_y + block_offset[i];
1189 1189
                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
1190 1190
                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1191
                                h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16<<h->pixel_shift), linesize);
1191
                                h->hpc.pred8x8l_add[dir](ptr, h->mb + (i*16<<pixel_shift), linesize);
1192 1192
                            }else{
1193 1193
                                const int nnz = h->non_zero_count_cache[ scan8[i] ];
1194 1194
                                h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
1195 1195
                                                            (h->topright_samples_available<<i)&0x4000, linesize);
1196 1196
                                if(nnz){
1197
                                    if(nnz == 1 && dctcoef_get(h, h->mb, i*16))
1198
                                        idct_dc_add(ptr, h->mb + (i*16<<h->pixel_shift), linesize);
1197
                                    if(nnz == 1 && dctcoef_get(h, h->mb, i*16, pixel_shift))
1198
                                        idct_dc_add(ptr, h->mb + (i*16<<pixel_shift), linesize);
1199 1199
                                    else
1200
                                        idct_add   (ptr, h->mb + (i*16<<h->pixel_shift), linesize);
1200
                                        idct_add   (ptr, h->mb + (i*16<<pixel_shift), linesize);
1201 1201
                                }
1202 1202
                            }
1203 1203
                        }
......
1214 1214
                            const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
1215 1215

  
1216 1216
                            if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1217
                                h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16<<h->pixel_shift), linesize);
1217
                                h->hpc.pred4x4_add[dir](ptr, h->mb + (i*16<<pixel_shift), linesize);
1218 1218
                            }else{
1219 1219
                                uint8_t *topright;
1220 1220
                                int nnz, tr;
......
1223 1223
                                    const int topright_avail= (h->topright_samples_available<<i)&0x8000;
1224 1224
                                    assert(mb_y || linesize <= block_offset[i]);
1225 1225
                                    if(!topright_avail){
1226
                                        if (h->pixel_shift) {
1226
                                        if (pixel_shift) {
1227 1227
                                            tr_high= ((uint16_t*)ptr)[3 - linesize/2]*0x0001000100010001ULL;
1228 1228
                                            topright= (uint8_t*) &tr_high;
1229 1229
                                        } else {
......
1231 1231
                                        topright= (uint8_t*) &tr;
1232 1232
                                        }
1233 1233
                                    }else
1234
                                        topright= ptr + (4<<h->pixel_shift) - linesize;
1234
                                        topright= ptr + (4<<pixel_shift) - linesize;
1235 1235
                                }else
1236 1236
                                    topright= NULL;
1237 1237

  
......
1239 1239
                                nnz = h->non_zero_count_cache[ scan8[i] ];
1240 1240
                                if(nnz){
1241 1241
                                    if(is_h264){
1242
                                        if(nnz == 1 && dctcoef_get(h, h->mb, i*16))
1243
                                            idct_dc_add(ptr, h->mb + (i*16<<h->pixel_shift), linesize);
1242
                                        if(nnz == 1 && dctcoef_get(h, h->mb, i*16, pixel_shift))
1243
                                            idct_dc_add(ptr, h->mb + (i*16<<pixel_shift), linesize);
1244 1244
                                        else
1245
                                            idct_add   (ptr, h->mb + (i*16<<h->pixel_shift), linesize);
1245
                                            idct_add   (ptr, h->mb + (i*16<<pixel_shift), linesize);
1246 1246
                                    }
1247 1247
#if CONFIG_SVQ3_DECODER
1248 1248
                                    else
......
1263 1263
                            static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
1264 1264
                                                                    8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
1265 1265
                            for(i = 0; i < 16; i++)
1266
                                dctcoef_set(h, h->mb, dc_mapping[i], dctcoef_get(h, h->mb_luma_dc, i));
1266
                                dctcoef_set(h, h->mb, dc_mapping[i], dctcoef_get(h, h->mb_luma_dc, i,pixel_shift),pixel_shift);
1267 1267
                        }
1268 1268
                    }
1269 1269
                }
......
1273 1273
#endif
1274 1274
            }
1275 1275
            if(h->deblocking_filter)
1276
                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
1276
                xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple, pixel_shift);
1277 1277
        }else if(is_h264){
1278 1278
            ff_hl_motion(h, dest_y, dest_cb, dest_cr,
1279 1279
                      s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
......
1290 1290
                            h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
1291 1291
                        }else{
1292 1292
                            for(i=0; i<16; i++){
1293
                                if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16))
1294
                                    s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16<<h->pixel_shift), linesize);
1293
                                if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16,pixel_shift))
1294
                                    s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + (i*16<<pixel_shift), linesize);
1295 1295
                            }
1296 1296
                        }
1297 1297
                    }else{
......
1303 1303
                        idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
1304 1304
                        for(i=0; i<16; i+=di){
1305 1305
                            if(h->non_zero_count_cache[ scan8[i] ]){
1306
                                idct_add(dest_y + block_offset[i], h->mb + (i*16<<h->pixel_shift), linesize);
1306
                                idct_add(dest_y + block_offset[i], h->mb + (i*16<<pixel_shift), linesize);
1307 1307
                            }
1308 1308
                        }
1309 1309
                    }else{
......
1331 1331
            uint8_t *dest[2] = {dest_cb, dest_cr};
1332 1332
            if(transform_bypass){
1333 1333
                if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
1334
                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16<<h->pixel_shift), uvlinesize);
1335
                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16<<h->pixel_shift), uvlinesize);
1334
                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + (16*16<<pixel_shift), uvlinesize);
1335
                    h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + (20*16<<pixel_shift), uvlinesize);
1336 1336
                }else{
1337 1337
                    idct_add = s->dsp.add_pixels4;
1338 1338
                    for(i=16; i<16+8; i++){
1339
                        if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16))
1340
                            idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16<<h->pixel_shift), uvlinesize);
1339
                        if(h->non_zero_count_cache[ scan8[i] ] || dctcoef_get(h, h->mb, i*16,pixel_shift))
1340
                            idct_add   (dest[(i&4)>>2] + block_offset[i], h->mb + (i*16<<pixel_shift), uvlinesize);
1341 1341
                    }
1342 1342
                }
1343 1343
            }else{
1344 1344
                if(is_h264){
1345 1345
                    if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
1346
                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16<<h->pixel_shift)       , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1346
                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16<<pixel_shift)       , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1347 1347
                    if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
1348
                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16)<<h->pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
1348
                        h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + ((16*16+4*16)<<pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
1349 1349
                    h->h264dsp.h264_idct_add8(dest, block_offset,
1350 1350
                                              h->mb, uvlinesize,
1351 1351
                                              h->non_zero_count_cache);
......
1372 1372
/**
1373 1373
 * Process a macroblock; this case avoids checks for expensive uncommon cases.
1374 1374
 */
1375
static void hl_decode_mb_simple(H264Context *h){
1376
    hl_decode_mb_internal(h, 1);
1375
static void hl_decode_mb_simple8(H264Context *h){
1376
    hl_decode_mb_internal(h, 1, 0);
1377 1377
}
1378 1378

  
1379 1379
/**
1380 1380
 * Process a macroblock; this handles edge cases, such as interlacing.
1381 1381
 */
1382 1382
static void av_noinline hl_decode_mb_complex(H264Context *h){
1383
    hl_decode_mb_internal(h, 0);
1383
    hl_decode_mb_internal(h, 0, h->pixel_shift);
1384 1384
}
1385 1385

  
1386 1386
void ff_h264_hl_decode_mb(H264Context *h){
......
1389 1389
    const int mb_type= s->current_picture.mb_type[mb_xy];
1390 1390
    int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
1391 1391

  
1392
    if (is_complex)
1392
    if (is_complex || h->pixel_shift)
1393 1393
        hl_decode_mb_complex(h);
1394
    else hl_decode_mb_simple(h);
1394
    else{
1395
        hl_decode_mb_simple8(h);
1396
    }
1395 1397
}
1396 1398

  
1397 1399
static int pred_weight_table(H264Context *h){

Also available in: Unified diff