Revision b64dfbb8 libavcodec/i386/h264dsp_mmx.c
libavcodec/i386/h264dsp_mmx.c  

749  749 
"pmullw %3, %%mm6 \n\t"\ 
750  750 
"add %2, %0 \n\t"\ 
751  751 
"punpcklbw %%mm7, "#F" \n\t"\ 
752 
"paddw %4, "#A" \n\t"\ 

752  753 
"paddw "#F", "#A" \n\t"\ 
753  754 
"paddw "#A", %%mm6 \n\t"\ 
754  755 
"movq %%mm6, "#OF"(%1) \n\t" 
...  ...  
895  896 
QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\ 
896  897 
\ 
897  898 
: "+a"(src)\ 
898 
: "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\ 

899 
: "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\


899  900 
: "memory"\ 
900  901 
);\ 
901  902 
tmp += 4;\ 
...  ...  
903  904 
}\ 
904  905 
tmp = 3*4;\ 
905  906 
asm volatile(\ 
906 
"movq %4, %%mm6 \n\t"\ 

907  907 
"1: \n\t"\ 
908  908 
"movq (%0), %%mm0 \n\t"\ 
909  909 
"paddw 10(%0), %%mm0 \n\t"\ 
...  ...  
916  916 
"psubw %%mm1, %%mm0 \n\t"/*(ab)/4b */\ 
917  917 
"paddsw %%mm2, %%mm0 \n\t"\ 
918  918 
"psraw $2, %%mm0 \n\t"/*((ab)/4b+c)/4 */\ 
919 
"paddw %%mm6, %%mm2 \n\t"\ 

920 
"paddw %%mm2, %%mm0 \n\t"/*(a5*b+20*c)/16 +32 */\ 

919 
"paddw %%mm2, %%mm0 \n\t"/*(a5*b+20*c)/16 */\ 

921  920 
"psraw $6, %%mm0 \n\t"\ 
922  921 
"packuswb %%mm0, %%mm0 \n\t"\ 
923  922 
OP(%%mm0, (%1),%%mm7, d)\ 
...  ...  
926  925 
"decl %2 \n\t"\ 
927  926 
" jnz 1b \n\t"\ 
928  927 
: "+a"(tmp), "+c"(dst), "+m"(h)\ 
929 
: "S"((long)dstStride), "m"(ff_pw_32)\


928 
: "S"((long)dstStride)\ 

930  929 
: "memory"\ 
931  930 
);\ 
932  931 
}\ 
...  ...  
1137  1136 
QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\ 
1138  1137 
QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\ 
1139  1138 
: "+a"(src)\ 
1140 
: "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\ 

1139 
: "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\


1141  1140 
: "memory"\ 
1142  1141 
);\ 
1143  1142 
if(size==16){\ 
...  ...  
1151  1150 
QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\ 
1152  1151 
QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\ 
1153  1152 
: "+a"(src)\ 
1154 
: "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\ 

1153 
: "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\


1155  1154 
: "memory"\ 
1156  1155 
);\ 
1157  1156 
}\ 
...  ...  
1163  1162 
do{\ 
1164  1163 
h = size;\ 
1165  1164 
asm volatile(\ 
1166 
"movq %4, %%mm6 \n\t"\ 

1167  1165 
"1: \n\t"\ 
1168  1166 
"movq (%0), %%mm0 \n\t"\ 
1169  1167 
"movq 8(%0), %%mm3 \n\t"\ 
...  ...  
1187  1185 
"paddsw %%mm5, %%mm3 \n\t"\ 
1188  1186 
"psraw $2, %%mm0 \n\t"\ 
1189  1187 
"psraw $2, %%mm3 \n\t"\ 
1190 
"paddw %%mm6, %%mm2 \n\t"\ 

1191 
"paddw %%mm6, %%mm5 \n\t"\ 

1192  1188 
"paddw %%mm2, %%mm0 \n\t"\ 
1193  1189 
"paddw %%mm5, %%mm3 \n\t"\ 
1194  1190 
"psraw $6, %%mm0 \n\t"\ 
...  ...  
1200  1196 
"decl %2 \n\t"\ 
1201  1197 
" jnz 1b \n\t"\ 
1202  1198 
: "+a"(tmp), "+c"(dst), "+m"(h)\ 
1203 
: "S"((long)dstStride), "m"(ff_pw_32)\


1199 
: "S"((long)dstStride)\ 

1204  1200 
: "memory"\ 
1205  1201 
);\ 
1206  1202 
tmp += 8  size*24;\ 
...  ...  
1246  1242 
static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ 
1247  1243 
{\ 
1248  1244 
asm volatile(\ 
1249 
"movq %5, %%mm6 \n\t"\ 

1250  1245 
"movq (%1), %%mm0 \n\t"\ 
1251  1246 
"movq 24(%1), %%mm1 \n\t"\ 
1252 
"paddw %%mm6, %%mm0 \n\t"\ 

1253 
"paddw %%mm6, %%mm1 \n\t"\ 

1254  1247 
"psraw $5, %%mm0 \n\t"\ 
1255  1248 
"psraw $5, %%mm1 \n\t"\ 
1256  1249 
"packuswb %%mm0, %%mm0 \n\t"\ 
...  ...  
1263  1256 
"lea (%2,%4,2), %2 \n\t"\ 
1264  1257 
"movq 48(%1), %%mm0 \n\t"\ 
1265  1258 
"movq 72(%1), %%mm1 \n\t"\ 
1266 
"paddw %%mm6, %%mm0 \n\t"\ 

1267 
"paddw %%mm6, %%mm1 \n\t"\ 

1268  1259 
"psraw $5, %%mm0 \n\t"\ 
1269  1260 
"psraw $5, %%mm1 \n\t"\ 
1270  1261 
"packuswb %%mm0, %%mm0 \n\t"\ 
...  ...  
1274  1265 
OP(%%mm0, (%2), %%mm4, d)\ 
1275  1266 
OP(%%mm1, (%2,%4), %%mm5, d)\ 
1276  1267 
:"+a"(src8), "+c"(src16), "+d"(dst)\ 
1277 
:"S"((long)src8Stride), "D"((long)dstStride), "m"(ff_pw_16)\


1268 
:"S"((long)src8Stride), "D"((long)dstStride)\ 

1278  1269 
:"memory");\ 
1279  1270 
}\ 
1280  1271 
static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\ 
1281  1272 
{\ 
1282 
asm volatile(\ 

1283 
"movq %0, %%mm6 \n\t"\ 

1284 
::"m"(ff_pw_16)\ 

1285 
);\ 

1286  1273 
while(h){\ 
1287  1274 
asm volatile(\ 
1288  1275 
"movq (%1), %%mm0 \n\t"\ 
1289  1276 
"movq 8(%1), %%mm1 \n\t"\ 
1290 
"paddw %%mm6, %%mm0 \n\t"\ 

1291 
"paddw %%mm6, %%mm1 \n\t"\ 

1292  1277 
"psraw $5, %%mm0 \n\t"\ 
1293  1278 
"psraw $5, %%mm1 \n\t"\ 
1294  1279 
"packuswb %%mm1, %%mm0 \n\t"\ 
Also available in: Unified diff