300 
300 
* is 64 if not available.

301 
301 
*/

302 
302 
DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);


303 


304 
/*


305 
.UU.YYYY


306 
.UU.YYYY


307 
.vv.YYYY


308 
.VV.YYYY


309 
*/

303 
310 
uint8_t (*non_zero_count)[32];

304 
311 

305 
312 
/**

...  ...  
727 
734 
const uint8_t * left_block;

728 
735 
int topleft_partition= 1;

729 
736 
int i;

730 

static const uint8_t left_block_options[4][8]={

731 

{0,1,2,3,7,10,8,11},

732 

{2,2,3,3,8,11,8,11},

733 

{0,0,1,1,7,10,7,10},

734 

{0,2,0,2,7,10,7,10}


737 
static const uint8_t left_block_options[4][16]={


738 
{0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8},


739 
{2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8},


740 
{0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8},


741 
{0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}

735 
742 
};

736 
743 

737 
744 
top_xy = mb_xy  (s>mb_stride << FIELD_PICTURE);

...  ...  
788 
795 
h>left_mb_xy[0] = left_xy[0];

789 
796 
h>left_mb_xy[1] = left_xy[1];

790 
797 
if(for_deblock){


798 
*((uint64_t*)&h>non_zero_count_cache[0+8*1])= *((uint64_t*)&h>non_zero_count[mb_xy][ 0]);


799 
*((uint64_t*)&h>non_zero_count_cache[0+8*2])= *((uint64_t*)&h>non_zero_count[mb_xy][ 8]);


800 
*((uint32_t*)&h>non_zero_count_cache[0+8*5])= *((uint32_t*)&h>non_zero_count[mb_xy][16]);


801 
*((uint32_t*)&h>non_zero_count_cache[4+8*3])= *((uint32_t*)&h>non_zero_count[mb_xy][20]);


802 
*((uint64_t*)&h>non_zero_count_cache[0+8*4])= *((uint64_t*)&h>non_zero_count[mb_xy][24]);


803 

791 
804 
topleft_type = 0;

792 
805 
topright_type = 0;

793 
806 
top_type = h>slice_table[top_xy ] < 0xFFFF ? s>current_picture.mb_type[top_xy] : 0;

...  ...  
922 
935 
*/

923 
936 
//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)

924 
937 
if(top_type){

925 

h>non_zero_count_cache[4+8*0]= h>non_zero_count[top_xy][4];

926 

h>non_zero_count_cache[5+8*0]= h>non_zero_count[top_xy][5];

927 

h>non_zero_count_cache[6+8*0]= h>non_zero_count[top_xy][6];

928 

h>non_zero_count_cache[7+8*0]= h>non_zero_count[top_xy][3];


938 
*(uint32_t*)&h>non_zero_count_cache[4+8*0]= *(uint32_t*)&h>non_zero_count[top_xy][4+3*8];

929 
939 

930 

h>non_zero_count_cache[1+8*0]= h>non_zero_count[top_xy][9];

931 

h>non_zero_count_cache[2+8*0]= h>non_zero_count[top_xy][8];


940 
h>non_zero_count_cache[1+8*0]= h>non_zero_count[top_xy][1+1*8];


941 
h>non_zero_count_cache[2+8*0]= h>non_zero_count[top_xy][2+1*8];

932 
942 

933 

h>non_zero_count_cache[1+8*3]= h>non_zero_count[top_xy][12];

934 

h>non_zero_count_cache[2+8*3]= h>non_zero_count[top_xy][11];


943 
h>non_zero_count_cache[1+8*3]= h>non_zero_count[top_xy][1+2*8];


944 
h>non_zero_count_cache[2+8*3]= h>non_zero_count[top_xy][2+2*8];

935 
945 

936 
946 
}else{

937 
947 
h>non_zero_count_cache[4+8*0]=

...  ...  
949 
959 

950 
960 
for (i=0; i<2; i++) {

951 
961 
if(left_type[i]){

952 

h>non_zero_count_cache[3+8*1 + 2*8*i]= h>non_zero_count[left_xy[i]][left_block[0+2*i]];

953 

h>non_zero_count_cache[3+8*2 + 2*8*i]= h>non_zero_count[left_xy[i]][left_block[1+2*i]];

954 

h>non_zero_count_cache[0+8*1 + 8*i]= h>non_zero_count[left_xy[i]][left_block[4+2*i]];

955 

h>non_zero_count_cache[0+8*4 + 8*i]= h>non_zero_count[left_xy[i]][left_block[5+2*i]];


962 
h>non_zero_count_cache[3+8*1 + 2*8*i]= h>non_zero_count[left_xy[i]][left_block[8+0+2*i]];


963 
h>non_zero_count_cache[3+8*2 + 2*8*i]= h>non_zero_count[left_xy[i]][left_block[8+1+2*i]];


964 
h>non_zero_count_cache[0+8*1 + 8*i]= h>non_zero_count[left_xy[i]][left_block[8+4+2*i]];


965 
h>non_zero_count_cache[0+8*4 + 8*i]= h>non_zero_count[left_xy[i]][left_block[8+5+2*i]];

956 
966 
}else{

957 
967 
h>non_zero_count_cache[3+8*1 + 2*8*i]=

958 
968 
h>non_zero_count_cache[3+8*2 + 2*8*i]=

...  ...  
1204 
1214 
static inline void write_back_non_zero_count(H264Context *h){

1205 
1215 
const int mb_xy= h>mb_xy;

1206 
1216 

1207 

h>non_zero_count[mb_xy][0]= h>non_zero_count_cache[7+8*1];

1208 

h>non_zero_count[mb_xy][1]= h>non_zero_count_cache[7+8*2];

1209 

h>non_zero_count[mb_xy][2]= h>non_zero_count_cache[7+8*3];

1210 

h>non_zero_count[mb_xy][3]= h>non_zero_count_cache[7+8*4];

1211 

h>non_zero_count[mb_xy][4]= h>non_zero_count_cache[4+8*4];

1212 

h>non_zero_count[mb_xy][5]= h>non_zero_count_cache[5+8*4];

1213 

h>non_zero_count[mb_xy][6]= h>non_zero_count_cache[6+8*4];

1214 


1215 

h>non_zero_count[mb_xy][9]= h>non_zero_count_cache[1+8*2];

1216 

h>non_zero_count[mb_xy][8]= h>non_zero_count_cache[2+8*2];

1217 

h>non_zero_count[mb_xy][7]= h>non_zero_count_cache[2+8*1];

1218 


1219 

h>non_zero_count[mb_xy][12]=h>non_zero_count_cache[1+8*5];

1220 

h>non_zero_count[mb_xy][11]=h>non_zero_count_cache[2+8*5];

1221 

h>non_zero_count[mb_xy][10]=h>non_zero_count_cache[2+8*4];

1222 


1223 

//FIXME sort better how things are stored in non_zero_count

1224 


1225 


1226 

h>non_zero_count[mb_xy][13]= h>non_zero_count_cache[6+8*1];

1227 

h>non_zero_count[mb_xy][14]= h>non_zero_count_cache[6+8*2];

1228 

h>non_zero_count[mb_xy][15]= h>non_zero_count_cache[6+8*3];

1229 

h>non_zero_count[mb_xy][16]= h>non_zero_count_cache[5+8*1];

1230 

h>non_zero_count[mb_xy][17]= h>non_zero_count_cache[5+8*2];

1231 

h>non_zero_count[mb_xy][18]= h>non_zero_count_cache[5+8*3];

1232 

h>non_zero_count[mb_xy][19]= h>non_zero_count_cache[4+8*1];

1233 

h>non_zero_count[mb_xy][20]= h>non_zero_count_cache[4+8*2];

1234 

h>non_zero_count[mb_xy][21]= h>non_zero_count_cache[4+8*3];

1235 


1236 

h>non_zero_count[mb_xy][22]= h>non_zero_count_cache[1+8*1];

1237 

h>non_zero_count[mb_xy][23]= h>non_zero_count_cache[1+8*4];

1238 



1217 
*((uint64_t*)&h>non_zero_count[mb_xy][ 0]) = *((uint64_t*)&h>non_zero_count_cache[0+8*1]);


1218 
*((uint64_t*)&h>non_zero_count[mb_xy][ 8]) = *((uint64_t*)&h>non_zero_count_cache[0+8*2]);


1219 
*((uint32_t*)&h>non_zero_count[mb_xy][16]) = *((uint32_t*)&h>non_zero_count_cache[0+8*5]);


1220 
*((uint32_t*)&h>non_zero_count[mb_xy][20]) = *((uint32_t*)&h>non_zero_count_cache[4+8*3]);


1221 
*((uint64_t*)&h>non_zero_count[mb_xy][24]) = *((uint64_t*)&h>non_zero_count_cache[0+8*4]);

1239 
1222 
}

1240 
1223 

1241 
1224 
static inline void write_back_motion(H264Context *h, int mb_type){
