Revision b64dfbb8 libavcodec/i386/h264dsp_mmx.c

View differences:

libavcodec/i386/h264dsp_mmx.c
749 749
        "pmullw %3, %%mm6           \n\t"\
750 750
        "add %2, %0                 \n\t"\
751 751
        "punpcklbw %%mm7, "#F"      \n\t"\
752
        "paddw %4, "#A"             \n\t"\
752 753
        "paddw "#F", "#A"           \n\t"\
753 754
        "paddw "#A", %%mm6          \n\t"\
754 755
        "movq %%mm6, "#OF"(%1)      \n\t"
......
895 896
            QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 3*8*3)\
896 897
             \
897 898
            : "+a"(src)\
898
            : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\
899
            : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
899 900
            : "memory"\
900 901
        );\
901 902
        tmp += 4;\
......
903 904
    }\
904 905
    tmp -= 3*4;\
905 906
    asm volatile(\
906
        "movq %4, %%mm6             \n\t"\
907 907
        "1:                         \n\t"\
908 908
        "movq     (%0), %%mm0       \n\t"\
909 909
        "paddw  10(%0), %%mm0       \n\t"\
......
916 916
        "psubw %%mm1, %%mm0         \n\t"/*(a-b)/4-b */\
917 917
        "paddsw %%mm2, %%mm0        \n\t"\
918 918
        "psraw $2, %%mm0            \n\t"/*((a-b)/4-b+c)/4 */\
919
        "paddw %%mm6, %%mm2         \n\t"\
920
        "paddw %%mm2, %%mm0         \n\t"/*(a-5*b+20*c)/16 +32 */\
919
        "paddw %%mm2, %%mm0         \n\t"/*(a-5*b+20*c)/16 */\
921 920
        "psraw $6, %%mm0            \n\t"\
922 921
        "packuswb %%mm0, %%mm0      \n\t"\
923 922
        OP(%%mm0, (%1),%%mm7, d)\
......
926 925
        "decl %2                    \n\t"\
927 926
        " jnz 1b                    \n\t"\
928 927
        : "+a"(tmp), "+c"(dst), "+m"(h)\
929
        : "S"((long)dstStride), "m"(ff_pw_32)\
928
        : "S"((long)dstStride)\
930 929
        : "memory"\
931 930
    );\
932 931
}\
......
1137 1136
            QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 6*48)\
1138 1137
            QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 7*48)\
1139 1138
            : "+a"(src)\
1140
            : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\
1139
            : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
1141 1140
            : "memory"\
1142 1141
        );\
1143 1142
        if(size==16){\
......
1151 1150
                QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 14*48)\
1152 1151
                QPEL_H264HV(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, 15*48)\
1153 1152
                : "+a"(src)\
1154
                : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5)\
1153
                : "c"(tmp), "S"((long)srcStride), "m"(ff_pw_5), "m"(ff_pw_16)\
1155 1154
                : "memory"\
1156 1155
            );\
1157 1156
        }\
......
1163 1162
    do{\
1164 1163
    h = size;\
1165 1164
    asm volatile(\
1166
        "movq %4, %%mm6             \n\t"\
1167 1165
        "1:                         \n\t"\
1168 1166
        "movq     (%0), %%mm0       \n\t"\
1169 1167
        "movq    8(%0), %%mm3       \n\t"\
......
1187 1185
        "paddsw %%mm5, %%mm3        \n\t"\
1188 1186
        "psraw $2, %%mm0            \n\t"\
1189 1187
        "psraw $2, %%mm3            \n\t"\
1190
        "paddw %%mm6, %%mm2         \n\t"\
1191
        "paddw %%mm6, %%mm5         \n\t"\
1192 1188
        "paddw %%mm2, %%mm0         \n\t"\
1193 1189
        "paddw %%mm5, %%mm3         \n\t"\
1194 1190
        "psraw $6, %%mm0            \n\t"\
......
1200 1196
        "decl %2                    \n\t"\
1201 1197
        " jnz 1b                    \n\t"\
1202 1198
        : "+a"(tmp), "+c"(dst), "+m"(h)\
1203
        : "S"((long)dstStride), "m"(ff_pw_32)\
1199
        : "S"((long)dstStride)\
1204 1200
        : "memory"\
1205 1201
    );\
1206 1202
    tmp += 8 - size*24;\
......
1246 1242
static av_noinline void OPNAME ## pixels4_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
1247 1243
{\
1248 1244
    asm volatile(\
1249
        "movq       %5,  %%mm6          \n\t"\
1250 1245
        "movq      (%1), %%mm0          \n\t"\
1251 1246
        "movq    24(%1), %%mm1          \n\t"\
1252
        "paddw    %%mm6, %%mm0          \n\t"\
1253
        "paddw    %%mm6, %%mm1          \n\t"\
1254 1247
        "psraw      $5,  %%mm0          \n\t"\
1255 1248
        "psraw      $5,  %%mm1          \n\t"\
1256 1249
        "packuswb %%mm0, %%mm0          \n\t"\
......
1263 1256
        "lea  (%2,%4,2), %2             \n\t"\
1264 1257
        "movq    48(%1), %%mm0          \n\t"\
1265 1258
        "movq    72(%1), %%mm1          \n\t"\
1266
        "paddw    %%mm6, %%mm0          \n\t"\
1267
        "paddw    %%mm6, %%mm1          \n\t"\
1268 1259
        "psraw      $5,  %%mm0          \n\t"\
1269 1260
        "psraw      $5,  %%mm1          \n\t"\
1270 1261
        "packuswb %%mm0, %%mm0          \n\t"\
......
1274 1265
        OP(%%mm0, (%2),    %%mm4, d)\
1275 1266
        OP(%%mm1, (%2,%4), %%mm5, d)\
1276 1267
        :"+a"(src8), "+c"(src16), "+d"(dst)\
1277
        :"S"((long)src8Stride), "D"((long)dstStride), "m"(ff_pw_16)\
1268
        :"S"((long)src8Stride), "D"((long)dstStride)\
1278 1269
        :"memory");\
1279 1270
}\
1280 1271
static av_noinline void OPNAME ## pixels8_l2_shift5_ ## MMX(uint8_t *dst, int16_t *src16, uint8_t *src8, int dstStride, int src8Stride, int h)\
1281 1272
{\
1282
    asm volatile(\
1283
        "movq       %0,  %%mm6          \n\t"\
1284
        ::"m"(ff_pw_16)\
1285
        );\
1286 1273
    while(h--){\
1287 1274
    asm volatile(\
1288 1275
        "movq      (%1), %%mm0          \n\t"\
1289 1276
        "movq     8(%1), %%mm1          \n\t"\
1290
        "paddw    %%mm6, %%mm0          \n\t"\
1291
        "paddw    %%mm6, %%mm1          \n\t"\
1292 1277
        "psraw      $5,  %%mm0          \n\t"\
1293 1278
        "psraw      $5,  %%mm1          \n\t"\
1294 1279
        "packuswb %%mm1, %%mm0          \n\t"\

Also available in: Unified diff