300 |
300 |
* is 64 if not available.
|
301 |
301 |
*/
|
302 |
302 |
DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
|
|
303 |
|
|
304 |
/*
|
|
305 |
.UU.YYYY
|
|
306 |
.UU.YYYY
|
|
307 |
.vv.YYYY
|
|
308 |
.VV.YYYY
|
|
309 |
*/
|
303 |
310 |
uint8_t (*non_zero_count)[32];
|
304 |
311 |
|
305 |
312 |
/**
|
... | ... | |
727 |
734 |
const uint8_t * left_block;
|
728 |
735 |
int topleft_partition= -1;
|
729 |
736 |
int i;
|
730 |
|
static const uint8_t left_block_options[4][8]={
|
731 |
|
{0,1,2,3,7,10,8,11},
|
732 |
|
{2,2,3,3,8,11,8,11},
|
733 |
|
{0,0,1,1,7,10,7,10},
|
734 |
|
{0,2,0,2,7,10,7,10}
|
|
737 |
static const uint8_t left_block_options[4][16]={
|
|
738 |
{0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8},
|
|
739 |
{2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8},
|
|
740 |
{0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8},
|
|
741 |
{0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}
|
735 |
742 |
};
|
736 |
743 |
|
737 |
744 |
top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
|
... | ... | |
788 |
795 |
h->left_mb_xy[0] = left_xy[0];
|
789 |
796 |
h->left_mb_xy[1] = left_xy[1];
|
790 |
797 |
if(for_deblock){
|
|
798 |
*((uint64_t*)&h->non_zero_count_cache[0+8*1])= *((uint64_t*)&h->non_zero_count[mb_xy][ 0]);
|
|
799 |
*((uint64_t*)&h->non_zero_count_cache[0+8*2])= *((uint64_t*)&h->non_zero_count[mb_xy][ 8]);
|
|
800 |
*((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_count[mb_xy][16]);
|
|
801 |
*((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_count[mb_xy][20]);
|
|
802 |
*((uint64_t*)&h->non_zero_count_cache[0+8*4])= *((uint64_t*)&h->non_zero_count[mb_xy][24]);
|
|
803 |
|
791 |
804 |
topleft_type = 0;
|
792 |
805 |
topright_type = 0;
|
793 |
806 |
top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
|
... | ... | |
922 |
935 |
*/
|
923 |
936 |
//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
|
924 |
937 |
if(top_type){
|
925 |
|
h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
|
926 |
|
h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
|
927 |
|
h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
|
928 |
|
h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
|
|
938 |
*(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8];
|
929 |
939 |
|
930 |
|
h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
|
931 |
|
h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
|
|
940 |
h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8];
|
|
941 |
h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8];
|
932 |
942 |
|
933 |
|
h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
|
934 |
|
h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
|
|
943 |
h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8];
|
|
944 |
h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8];
|
935 |
945 |
|
936 |
946 |
}else{
|
937 |
947 |
h->non_zero_count_cache[4+8*0]=
|
... | ... | |
949 |
959 |
|
950 |
960 |
for (i=0; i<2; i++) {
|
951 |
961 |
if(left_type[i]){
|
952 |
|
h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
|
953 |
|
h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
|
954 |
|
h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
|
955 |
|
h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
|
|
962 |
h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]];
|
|
963 |
h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]];
|
|
964 |
h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]];
|
|
965 |
h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]];
|
956 |
966 |
}else{
|
957 |
967 |
h->non_zero_count_cache[3+8*1 + 2*8*i]=
|
958 |
968 |
h->non_zero_count_cache[3+8*2 + 2*8*i]=
|
... | ... | |
1204 |
1214 |
static inline void write_back_non_zero_count(H264Context *h){
|
1205 |
1215 |
const int mb_xy= h->mb_xy;
|
1206 |
1216 |
|
1207 |
|
h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
|
1208 |
|
h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
|
1209 |
|
h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
|
1210 |
|
h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
|
1211 |
|
h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
|
1212 |
|
h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
|
1213 |
|
h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
|
1214 |
|
|
1215 |
|
h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
|
1216 |
|
h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
|
1217 |
|
h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
|
1218 |
|
|
1219 |
|
h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
|
1220 |
|
h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
|
1221 |
|
h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
|
1222 |
|
|
1223 |
|
//FIXME sort better how things are stored in non_zero_count
|
1224 |
|
|
1225 |
|
|
1226 |
|
h->non_zero_count[mb_xy][13]= h->non_zero_count_cache[6+8*1];
|
1227 |
|
h->non_zero_count[mb_xy][14]= h->non_zero_count_cache[6+8*2];
|
1228 |
|
h->non_zero_count[mb_xy][15]= h->non_zero_count_cache[6+8*3];
|
1229 |
|
h->non_zero_count[mb_xy][16]= h->non_zero_count_cache[5+8*1];
|
1230 |
|
h->non_zero_count[mb_xy][17]= h->non_zero_count_cache[5+8*2];
|
1231 |
|
h->non_zero_count[mb_xy][18]= h->non_zero_count_cache[5+8*3];
|
1232 |
|
h->non_zero_count[mb_xy][19]= h->non_zero_count_cache[4+8*1];
|
1233 |
|
h->non_zero_count[mb_xy][20]= h->non_zero_count_cache[4+8*2];
|
1234 |
|
h->non_zero_count[mb_xy][21]= h->non_zero_count_cache[4+8*3];
|
1235 |
|
|
1236 |
|
h->non_zero_count[mb_xy][22]= h->non_zero_count_cache[1+8*1];
|
1237 |
|
h->non_zero_count[mb_xy][23]= h->non_zero_count_cache[1+8*4];
|
1238 |
|
|
|
1217 |
*((uint64_t*)&h->non_zero_count[mb_xy][ 0]) = *((uint64_t*)&h->non_zero_count_cache[0+8*1]);
|
|
1218 |
*((uint64_t*)&h->non_zero_count[mb_xy][ 8]) = *((uint64_t*)&h->non_zero_count_cache[0+8*2]);
|
|
1219 |
*((uint32_t*)&h->non_zero_count[mb_xy][16]) = *((uint32_t*)&h->non_zero_count_cache[0+8*5]);
|
|
1220 |
*((uint32_t*)&h->non_zero_count[mb_xy][20]) = *((uint32_t*)&h->non_zero_count_cache[4+8*3]);
|
|
1221 |
*((uint64_t*)&h->non_zero_count[mb_xy][24]) = *((uint64_t*)&h->non_zero_count_cache[0+8*4]);
|
1239 |
1222 |
}
|
1240 |
1223 |
|
1241 |
1224 |
static inline void write_back_motion(H264Context *h, int mb_type){
|