Revision 270a85d2

View differences:

libavcodec/x86/h264_intrapred.asm
138 138
    add       r5d, r6d
139 139
    lea       r2d, [r2+r5+16]
140 140
    shr       r2d, 5
141
%ifidn %1, mmx
142
    movd       m0, r2d
143
    punpcklbw  m0, m0
144
    punpcklwd  m0, m0
145
    punpckldq  m0, m0
146
%elifidn %1, mmxext
141
%ifidn %1, mmxext
147 142
    movd       m0, r2d
148 143
    punpcklbw  m0, m0
149 144
    pshufw     m0, m0, 0
......
185 180
%endmacro
186 181

  
187 182
INIT_MMX
188
PRED16x16_DC    mmx, movq
189 183
PRED16x16_DC mmxext, movq
190 184
INIT_XMM
191 185
PRED16x16_DC    sse, movaps
......
337 331
; void pred8x8_dc_rv40(uint8_t *src, int stride)
338 332
;-----------------------------------------------------------------------------
339 333

  
340
%macro PRED8x8_DC 1
341
cglobal pred8x8_dc_rv40_%1, 2,7
334
cglobal pred8x8_dc_rv40_mmxext, 2,7
342 335
    mov       r4, r0
343 336
    sub       r0, r1
344 337
    pxor      mm0, mm0
......
358 351
    add       r5d, r6d
359 352
    lea       r2d, [r2+r5+8]
360 353
    shr       r2d, 4
361
%ifidn %1, mmx
362
    movd      mm0, r2d
363
    punpcklbw mm0, mm0
364
    punpcklwd mm0, mm0
365
    punpckldq mm0, mm0
366
%else
367 354
    movd      mm0, r2d
368 355
    punpcklbw mm0, mm0
369 356
    pshufw    mm0, mm0, 0
370
%endif
371 357
    mov       r3d, 4
372 358
.loop:
373 359
    movq [r4+r1*0], mm0
......
376 362
    dec   r3d
377 363
    jg .loop
378 364
    REP_RET
379
%endmacro
380

  
381

  
382
PRED8x8_DC mmx
383
PRED8x8_DC mmxext
384 365

  
385 366
;-----------------------------------------------------------------------------
386 367
; void pred8x8_tm_vp8(uint8_t *src, int stride)
......
484 465
    dec         r2d
485 466
    jg .loop
486 467
    REP_RET
468

  
469
cglobal pred4x4_dc_mmxext, 3,5
470
    pxor   mm7, mm7
471
    mov     r4, r0
472
    sub     r0, r2
473
    movd   mm0, [r0]
474
    psadbw mm0, mm7
475
    movzx  r1d, byte [r0+r2*1-1]
476
    movd   r3d, mm0
477
    add    r3d, r1d
478
    movzx  r1d, byte [r0+r2*2-1]
479
    lea     r0, [r0+r2*2]
480
    add    r3d, r1d
481
    movzx  r1d, byte [r0+r2*1-1]
482
    add    r3d, r1d
483
    movzx  r1d, byte [r0+r2*2-1]
484
    add    r3d, r1d
485
    add    r3d, 4
486
    shr    r3d, 3
487
    imul   r3d, 0x01010101
488
    mov   [r4+r2*0], r3d
489
    mov   [r0+r2*0], r3d
490
    mov   [r0+r2*1], r3d
491
    mov   [r0+r2*2], r3d
492
    RET
libavcodec/x86/h264dsp_mmx.c
2328 2328
void ff_pred16x16_horizontal_mmx   (uint8_t *src, int stride);
2329 2329
void ff_pred16x16_horizontal_mmxext(uint8_t *src, int stride);
2330 2330
void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride);
2331
void ff_pred16x16_dc_mmx           (uint8_t *src, int stride);
2332 2331
void ff_pred16x16_dc_mmxext        (uint8_t *src, int stride);
2333 2332
void ff_pred16x16_dc_sse           (uint8_t *src, int stride);
2334 2333
void ff_pred16x16_dc_sse2          (uint8_t *src, int stride);
......
2336 2335
void ff_pred16x16_tm_vp8_mmx       (uint8_t *src, int stride);
2337 2336
void ff_pred16x16_tm_vp8_mmxext    (uint8_t *src, int stride);
2338 2337
void ff_pred16x16_tm_vp8_sse2      (uint8_t *src, int stride);
2339
void ff_pred8x8_dc_rv40_mmx        (uint8_t *src, int stride);
2340 2338
void ff_pred8x8_dc_rv40_mmxext     (uint8_t *src, int stride);
2341 2339
void ff_pred8x8_vertical_mmx       (uint8_t *src, int stride);
2342 2340
void ff_pred8x8_horizontal_mmx     (uint8_t *src, int stride);
......
2346 2344
void ff_pred8x8_tm_vp8_mmxext      (uint8_t *src, int stride);
2347 2345
void ff_pred8x8_tm_vp8_sse2        (uint8_t *src, int stride);
2348 2346
void ff_pred8x8_tm_vp8_ssse3       (uint8_t *src, int stride);
2347
void ff_pred4x4_dc_mmxext          (uint8_t *src, const uint8_t *topright, int stride);
2349 2348

  
2350 2349
#if CONFIG_H264DSP
2351 2350
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
......
2354 2353
    if (mm_flags & FF_MM_MMX) {
2355 2354
        h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_mmx;
2356 2355
        h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx;
2357
        h->pred16x16[DC_PRED8x8  ] = ff_pred16x16_dc_mmx;
2358 2356
        h->pred8x8  [VERT_PRED8x8] = ff_pred8x8_vertical_mmx;
2359 2357
        h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx;
2360 2358
        if (codec_id == CODEC_ID_VP8) {
2361 2359
            h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmx;
2362
            h->pred8x8  [DC_PRED8x8   ] = ff_pred8x8_dc_rv40_mmx;
2363 2360
            h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmx;
2364 2361
        }
2365 2362
    }
......
2368 2365
        h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext;
2369 2366
        h->pred16x16[DC_PRED8x8  ] = ff_pred16x16_dc_mmxext;
2370 2367
        h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext;
2368
        h->pred4x4  [DC_PRED     ] = ff_pred4x4_dc_mmxext;
2371 2369
        if (codec_id == CODEC_ID_VP8) {
2372 2370
            h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
2373 2371
            h->pred8x8  [DC_PRED8x8   ] = ff_pred8x8_dc_rv40_mmxext;

Also available in: Unified diff