Revision 9f3d6ca4 libavcodec/x86/h264dsp_mmx.c

View differences:

libavcodec/x86/h264dsp_mmx.c
218 218
    );
219 219
}
220 220

  
221
#define LF_FUNC(DIR, TYPE, OPT) \
222
void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \
223
                                                  int alpha, int beta, int8_t *tc0);
224
#define LF_IFUNC(DIR, TYPE, OPT) \
225
void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \
226
                                                  int alpha, int beta);
227

  
228
LF_FUNC (h,  chroma,       mmxext)
229
LF_IFUNC(h,  chroma_intra, mmxext)
230
LF_FUNC (v,  chroma,       mmxext)
231
LF_IFUNC(v,  chroma_intra, mmxext)
232

  
233
LF_FUNC (h,  luma,         mmxext)
234
LF_IFUNC(h,  luma_intra,   mmxext)
235
#if HAVE_YASM && ARCH_X86_32
236
LF_FUNC (v8, luma,         mmxext)
237
static void ff_deblock_v_luma_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
221
#define LF_FUNC(DIR, TYPE, DEPTH, OPT) \
222
void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, int stride, \
223
                                                                int alpha, int beta, int8_t *tc0);
224
#define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \
225
void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, int stride, \
226
                                                                int alpha, int beta);
227

  
228
#define LF_FUNCS(type, depth)\
229
LF_FUNC (h,  chroma,       depth, mmxext)\
230
LF_IFUNC(h,  chroma_intra, depth, mmxext)\
231
LF_FUNC (v,  chroma,       depth, mmxext)\
232
LF_IFUNC(v,  chroma_intra, depth, mmxext)\
233
LF_FUNC (h,  luma,         depth, mmxext)\
234
LF_IFUNC(h,  luma_intra,   depth, mmxext)\
235
LF_FUNC (h,  luma,         depth, sse2)\
236
LF_IFUNC(h,  luma_intra,   depth, sse2)\
237
LF_FUNC (v,  luma,         depth, sse2)\
238
LF_IFUNC(v,  luma_intra,   depth, sse2)\
239
LF_FUNC (h,  luma,         depth,  avx)\
240
LF_IFUNC(h,  luma_intra,   depth,  avx)\
241
LF_FUNC (v,  luma,         depth,  avx)\
242
LF_IFUNC(v,  luma_intra,   depth,  avx)
243

  
244
LF_FUNCS( uint8_t,  8)
245
LF_FUNCS(uint16_t, 10)
246

  
247
LF_FUNC (v8, luma,             8, mmxext)
248
static void ff_deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0)
238 249
{
239 250
    if((tc0[0] & tc0[1]) >= 0)
240
        ff_deblock_v8_luma_mmxext(pix+0, stride, alpha, beta, tc0);
251
        ff_deblock_v8_luma_8_mmxext(pix+0, stride, alpha, beta, tc0);
241 252
    if((tc0[2] & tc0[3]) >= 0)
242
        ff_deblock_v8_luma_mmxext(pix+8, stride, alpha, beta, tc0+2);
253
        ff_deblock_v8_luma_8_mmxext(pix+8, stride, alpha, beta, tc0+2);
243 254
}
244
LF_IFUNC(v8, luma_intra,   mmxext)
245
static void ff_deblock_v_luma_intra_mmxext(uint8_t *pix, int stride, int alpha, int beta)
255
LF_IFUNC(v8, luma_intra,        8, mmxext)
256
static void ff_deblock_v_luma_intra_8_mmxext(uint8_t *pix, int stride, int alpha, int beta)
246 257
{
247
    ff_deblock_v8_luma_intra_mmxext(pix+0, stride, alpha, beta);
248
    ff_deblock_v8_luma_intra_mmxext(pix+8, stride, alpha, beta);
258
    ff_deblock_v8_luma_intra_8_mmxext(pix+0, stride, alpha, beta);
259
    ff_deblock_v8_luma_intra_8_mmxext(pix+8, stride, alpha, beta);
249 260
}
250
#endif
251 261

  
252
LF_FUNC (h,  luma,         sse2)
253
LF_IFUNC(h,  luma_intra,   sse2)
254
LF_FUNC (v,  luma,         sse2)
255
LF_IFUNC(v,  luma_intra,   sse2)
256
LF_FUNC (h,  luma,         avx)
257
LF_IFUNC(h,  luma_intra,   avx)
258
LF_FUNC (v,  luma,         avx)
259
LF_IFUNC(v,  luma_intra,   avx)
262
LF_FUNC (v,  luma,            10, mmxext)
263
LF_IFUNC(v,  luma_intra,      10, mmxext)
260 264

  
261 265
/***********************************/
262 266
/* weighted prediction */
......
318 322
            c->h264_idct_add8      = ff_h264_idct_add8_mmx2;
319 323
            c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2;
320 324

  
321
            c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_mmxext;
322
            c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_mmxext;
323
            c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_mmxext;
324
            c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_mmxext;
325
            c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext;
326
            c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext;
327
            c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext;
328
            c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext;
325 329
#if ARCH_X86_32
326
            c->h264_v_loop_filter_luma= ff_deblock_v_luma_mmxext;
327
            c->h264_h_loop_filter_luma= ff_deblock_h_luma_mmxext;
328
            c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_mmxext;
329
            c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_mmxext;
330
            c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext;
331
            c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext;
332
            c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmxext;
333
            c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmxext;
330 334
#endif
331 335
            c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2;
332 336
            c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2;
......
364 368
                c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_sse2;
365 369

  
366 370
#if HAVE_ALIGNED_STACK
367
                c->h264_v_loop_filter_luma = ff_deblock_v_luma_sse2;
368
                c->h264_h_loop_filter_luma = ff_deblock_h_luma_sse2;
369
                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_sse2;
370
                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_sse2;
371
                c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2;
372
                c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2;
373
                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
374
                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
371 375
#endif
372 376

  
373 377
                c->h264_idct_add16 = ff_h264_idct_add16_sse2;
......
383 387
            }
384 388
            if (mm_flags&AV_CPU_FLAG_AVX) {
385 389
#if HAVE_ALIGNED_STACK
386
                c->h264_v_loop_filter_luma = ff_deblock_v_luma_avx;
387
                c->h264_h_loop_filter_luma = ff_deblock_h_luma_avx;
388
                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_avx;
389
                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_avx;
390
                c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx;
391
                c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx;
392
                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx;
393
                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx;
394
#endif
395
            }
396
        }
397
    }
398
#endif
399
    } else if (bit_depth == 10) {
400
#if HAVE_YASM
401
    if (mm_flags & AV_CPU_FLAG_MMX) {
402
        if (mm_flags & AV_CPU_FLAG_MMX2) {
403
#if ARCH_X86_32
404
            c->h264_v_loop_filter_luma= ff_deblock_v_luma_10_mmxext;
405
            c->h264_h_loop_filter_luma= ff_deblock_h_luma_10_mmxext;
406
            c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmxext;
407
            c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmxext;
408
#endif
409
            if (mm_flags&AV_CPU_FLAG_SSE2) {
410
#if HAVE_ALIGNED_STACK
411
                c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2;
412
                c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2;
413
                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2;
414
                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2;
415
#endif
416
            }
417
            if (mm_flags&AV_CPU_FLAG_AVX) {
418
#if HAVE_ALIGNED_STACK
419
                c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx;
420
                c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx;
421
                c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx;
422
                c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx;
390 423
#endif
391 424
            }
392 425
        }

Also available in: Unified diff