Revision 270a85d2
libavcodec/x86/h264_intrapred.asm | ||
---|---|---|
138 | 138 |
add r5d, r6d |
139 | 139 |
lea r2d, [r2+r5+16] |
140 | 140 |
shr r2d, 5 |
141 |
%ifidn %1, mmx |
|
142 |
movd m0, r2d |
|
143 |
punpcklbw m0, m0 |
|
144 |
punpcklwd m0, m0 |
|
145 |
punpckldq m0, m0 |
|
146 |
%elifidn %1, mmxext |
|
141 |
%ifidn %1, mmxext |
|
147 | 142 |
movd m0, r2d |
148 | 143 |
punpcklbw m0, m0 |
149 | 144 |
pshufw m0, m0, 0 |
... | ... | |
185 | 180 |
%endmacro |
186 | 181 |
|
187 | 182 |
INIT_MMX |
188 |
PRED16x16_DC mmx, movq |
|
189 | 183 |
PRED16x16_DC mmxext, movq |
190 | 184 |
INIT_XMM |
191 | 185 |
PRED16x16_DC sse, movaps |
... | ... | |
337 | 331 |
; void pred8x8_dc_rv40(uint8_t *src, int stride) |
338 | 332 |
;----------------------------------------------------------------------------- |
339 | 333 |
|
340 |
%macro PRED8x8_DC 1 |
|
341 |
cglobal pred8x8_dc_rv40_%1, 2,7 |
|
334 |
cglobal pred8x8_dc_rv40_mmxext, 2,7 |
|
342 | 335 |
mov r4, r0 |
343 | 336 |
sub r0, r1 |
344 | 337 |
pxor mm0, mm0 |
... | ... | |
358 | 351 |
add r5d, r6d |
359 | 352 |
lea r2d, [r2+r5+8] |
360 | 353 |
shr r2d, 4 |
361 |
%ifidn %1, mmx |
|
362 |
movd mm0, r2d |
|
363 |
punpcklbw mm0, mm0 |
|
364 |
punpcklwd mm0, mm0 |
|
365 |
punpckldq mm0, mm0 |
|
366 |
%else |
|
367 | 354 |
movd mm0, r2d |
368 | 355 |
punpcklbw mm0, mm0 |
369 | 356 |
pshufw mm0, mm0, 0 |
370 |
%endif |
|
371 | 357 |
mov r3d, 4 |
372 | 358 |
.loop: |
373 | 359 |
movq [r4+r1*0], mm0 |
... | ... | |
376 | 362 |
dec r3d |
377 | 363 |
jg .loop |
378 | 364 |
REP_RET |
379 |
%endmacro |
|
380 |
|
|
381 |
|
|
382 |
PRED8x8_DC mmx |
|
383 |
PRED8x8_DC mmxext |
|
384 | 365 |
|
385 | 366 |
;----------------------------------------------------------------------------- |
386 | 367 |
; void pred8x8_tm_vp8(uint8_t *src, int stride) |
... | ... | |
484 | 465 |
dec r2d |
485 | 466 |
jg .loop |
486 | 467 |
REP_RET |
468 |
|
|
469 |
cglobal pred4x4_dc_mmxext, 3,5 |
|
470 |
pxor mm7, mm7 |
|
471 |
mov r4, r0 |
|
472 |
sub r0, r2 |
|
473 |
movd mm0, [r0] |
|
474 |
psadbw mm0, mm7 |
|
475 |
movzx r1d, byte [r0+r2*1-1] |
|
476 |
movd r3d, mm0 |
|
477 |
add r3d, r1d |
|
478 |
movzx r1d, byte [r0+r2*2-1] |
|
479 |
lea r0, [r0+r2*2] |
|
480 |
add r3d, r1d |
|
481 |
movzx r1d, byte [r0+r2*1-1] |
|
482 |
add r3d, r1d |
|
483 |
movzx r1d, byte [r0+r2*2-1] |
|
484 |
add r3d, r1d |
|
485 |
add r3d, 4 |
|
486 |
shr r3d, 3 |
|
487 |
imul r3d, 0x01010101 |
|
488 |
mov [r4+r2*0], r3d |
|
489 |
mov [r0+r2*0], r3d |
|
490 |
mov [r0+r2*1], r3d |
|
491 |
mov [r0+r2*2], r3d |
|
492 |
RET |
libavcodec/x86/h264dsp_mmx.c | ||
---|---|---|
2328 | 2328 |
void ff_pred16x16_horizontal_mmx (uint8_t *src, int stride); |
2329 | 2329 |
void ff_pred16x16_horizontal_mmxext(uint8_t *src, int stride); |
2330 | 2330 |
void ff_pred16x16_horizontal_ssse3 (uint8_t *src, int stride); |
2331 |
void ff_pred16x16_dc_mmx (uint8_t *src, int stride); |
|
2332 | 2331 |
void ff_pred16x16_dc_mmxext (uint8_t *src, int stride); |
2333 | 2332 |
void ff_pred16x16_dc_sse (uint8_t *src, int stride); |
2334 | 2333 |
void ff_pred16x16_dc_sse2 (uint8_t *src, int stride); |
... | ... | |
2336 | 2335 |
void ff_pred16x16_tm_vp8_mmx (uint8_t *src, int stride); |
2337 | 2336 |
void ff_pred16x16_tm_vp8_mmxext (uint8_t *src, int stride); |
2338 | 2337 |
void ff_pred16x16_tm_vp8_sse2 (uint8_t *src, int stride); |
2339 |
void ff_pred8x8_dc_rv40_mmx (uint8_t *src, int stride); |
|
2340 | 2338 |
void ff_pred8x8_dc_rv40_mmxext (uint8_t *src, int stride); |
2341 | 2339 |
void ff_pred8x8_vertical_mmx (uint8_t *src, int stride); |
2342 | 2340 |
void ff_pred8x8_horizontal_mmx (uint8_t *src, int stride); |
... | ... | |
2346 | 2344 |
void ff_pred8x8_tm_vp8_mmxext (uint8_t *src, int stride); |
2347 | 2345 |
void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride); |
2348 | 2346 |
void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride); |
2347 |
void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride); |
|
2349 | 2348 |
|
2350 | 2349 |
#if CONFIG_H264DSP |
2351 | 2350 |
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) |
... | ... | |
2354 | 2353 |
if (mm_flags & FF_MM_MMX) { |
2355 | 2354 |
h->pred16x16[VERT_PRED8x8] = ff_pred16x16_vertical_mmx; |
2356 | 2355 |
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx; |
2357 |
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmx; |
|
2358 | 2356 |
h->pred8x8 [VERT_PRED8x8] = ff_pred8x8_vertical_mmx; |
2359 | 2357 |
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx; |
2360 | 2358 |
if (codec_id == CODEC_ID_VP8) { |
2361 | 2359 |
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmx; |
2362 |
h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmx; |
|
2363 | 2360 |
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmx; |
2364 | 2361 |
} |
2365 | 2362 |
} |
... | ... | |
2368 | 2365 |
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext; |
2369 | 2366 |
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext; |
2370 | 2367 |
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext; |
2368 |
h->pred4x4 [DC_PRED ] = ff_pred4x4_dc_mmxext; |
|
2371 | 2369 |
if (codec_id == CODEC_ID_VP8) { |
2372 | 2370 |
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext; |
2373 | 2371 |
h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext; |
Also available in: Unified diff