Revision 8dbe5856 libavcodec/x86/dsputil_mmx.c

View differences:

libavcodec/x86/dsputil_mmx.c
2418 2418
void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
2419 2419
{
2420 2420
    int mm_flags = av_get_cpu_flags();
2421
    const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8;
2421 2422

  
2422 2423
    if (avctx->dsp_mask) {
2423 2424
        if (avctx->dsp_mask & AV_CPU_FLAG_FORCE)
......
2499 2500
        c->put_pixels_clamped = ff_put_pixels_clamped_mmx;
2500 2501
        c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx;
2501 2502
        c->add_pixels_clamped = ff_add_pixels_clamped_mmx;
2503
        if (!h264_high_depth) {
2502 2504
        c->clear_block  = clear_block_mmx;
2503 2505
        c->clear_blocks = clear_blocks_mmx;
2504 2506
        if ((mm_flags & AV_CPU_FLAG_SSE) &&
......
2507 2509
            c->clear_block  = clear_block_sse;
2508 2510
            c->clear_blocks = clear_blocks_sse;
2509 2511
        }
2512
        }
2510 2513

  
2511 2514
#define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
2512 2515
        c->PFX ## _pixels_tab[IDX][0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
......
2514 2517
        c->PFX ## _pixels_tab[IDX][2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
2515 2518
        c->PFX ## _pixels_tab[IDX][3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU
2516 2519

  
2520
        if (!h264_high_depth) {
2517 2521
        SET_HPEL_FUNCS(put, 0, 16, mmx);
2518 2522
        SET_HPEL_FUNCS(put_no_rnd, 0, 16, mmx);
2519 2523
        SET_HPEL_FUNCS(avg, 0, 16, mmx);
......
2522 2526
        SET_HPEL_FUNCS(put_no_rnd, 1, 8, mmx);
2523 2527
        SET_HPEL_FUNCS(avg, 1, 8, mmx);
2524 2528
        SET_HPEL_FUNCS(avg_no_rnd, 1, 8, mmx);
2529
        }
2525 2530

  
2526 2531
#if ARCH_X86_32 || !HAVE_YASM
2527 2532
        c->gmc= gmc_mmx;
2528 2533
#endif
2529 2534
#if ARCH_X86_32 && HAVE_YASM
2535
        if (!h264_high_depth)
2530 2536
        c->emulated_edge_mc = emulated_edge_mc_mmx;
2531 2537
#endif
2532 2538

  
2533 2539
        c->add_bytes= add_bytes_mmx;
2534 2540
        c->add_bytes_l2= add_bytes_l2_mmx;
2535 2541

  
2542
        if (!h264_high_depth)
2536 2543
        c->draw_edges = draw_edges_mmx;
2537 2544

  
2538 2545
        if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
......
2541 2548
        }
2542 2549

  
2543 2550
#if HAVE_YASM
2551
        if (!h264_high_depth) {
2544 2552
        c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
2545 2553
        c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
2554
        }
2546 2555

  
2547 2556
        c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
2548 2557
        c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
......
2551 2560
        if (mm_flags & AV_CPU_FLAG_MMX2) {
2552 2561
            c->prefetch = prefetch_mmx2;
2553 2562

  
2563
            if (!h264_high_depth) {
2554 2564
            c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
2555 2565
            c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
2556 2566

  
......
2564 2574
            c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
2565 2575
            c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
2566 2576
            c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
2577
            }
2567 2578

  
2568 2579
            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
2580
                if (!h264_high_depth) {
2569 2581
                c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
2570 2582
                c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
2571 2583
                c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
2572 2584
                c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
2573 2585
                c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
2574 2586
                c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
2587
                }
2575 2588

  
2576 2589
                if (CONFIG_VP3_DECODER && HAVE_YASM) {
2577 2590
                    c->vp3_v_loop_filter= ff_vp3_v_loop_filter_mmx2;
......
2613 2626
            SET_QPEL_FUNCS(avg_qpel, 0, 16, mmx2);
2614 2627
            SET_QPEL_FUNCS(avg_qpel, 1, 8, mmx2);
2615 2628

  
2629
            if (!h264_high_depth) {
2616 2630
            SET_QPEL_FUNCS(put_h264_qpel, 0, 16, mmx2);
2617 2631
            SET_QPEL_FUNCS(put_h264_qpel, 1, 8, mmx2);
2618 2632
            SET_QPEL_FUNCS(put_h264_qpel, 2, 4, mmx2);
2619 2633
            SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, mmx2);
2620 2634
            SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, mmx2);
2621 2635
            SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmx2);
2636
            }
2622 2637

  
2623 2638
            SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, mmx2);
2624 2639
            SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, mmx2);
......
2629 2644
            c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
2630 2645
            c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
2631 2646

  
2647
            if (!h264_high_depth) {
2632 2648
            c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
2633 2649
            c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
2634 2650
            c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
2635 2651
            c->put_h264_chroma_pixels_tab[2]= ff_put_h264_chroma_mc2_mmx2;
2652
            }
2636 2653

  
2637 2654
            c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmx2;
2638 2655
#endif
......
2645 2662
        } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
2646 2663
            c->prefetch = prefetch_3dnow;
2647 2664

  
2665
            if (!h264_high_depth) {
2648 2666
            c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
2649 2667
            c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
2650 2668

  
......
2667 2685
                c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
2668 2686
                c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
2669 2687
            }
2688
            }
2670 2689

  
2671 2690
            if (CONFIG_VP3_DECODER
2672 2691
                && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) {
......
2681 2700
            SET_QPEL_FUNCS(avg_qpel, 0, 16, 3dnow);
2682 2701
            SET_QPEL_FUNCS(avg_qpel, 1, 8, 3dnow);
2683 2702

  
2703
            if (!h264_high_depth) {
2684 2704
            SET_QPEL_FUNCS(put_h264_qpel, 0, 16, 3dnow);
2685 2705
            SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 3dnow);
2686 2706
            SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 3dnow);
2687 2707
            SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 3dnow);
2688 2708
            SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 3dnow);
2689 2709
            SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 3dnow);
2710
            }
2690 2711

  
2691 2712
            SET_QPEL_FUNCS(put_2tap_qpel, 0, 16, 3dnow);
2692 2713
            SET_QPEL_FUNCS(put_2tap_qpel, 1, 8, 3dnow);
......
2694 2715
            SET_QPEL_FUNCS(avg_2tap_qpel, 1, 8, 3dnow);
2695 2716

  
2696 2717
#if HAVE_YASM
2718
            if (!h264_high_depth) {
2697 2719
            c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
2698 2720
            c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
2721
            }
2699 2722

  
2700 2723
            c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
2701 2724
            c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
......
2710 2733
            c->avg_h264_qpel_pixels_tab[1][x+y*4] = avg_h264_qpel8_mc##x##y##_##CPU;
2711 2734
        if((mm_flags & AV_CPU_FLAG_SSE2) && !(mm_flags & AV_CPU_FLAG_3DNOW)){
2712 2735
            // these functions are slower than mmx on AMD, but faster on Intel
2736
            if (!h264_high_depth) {
2713 2737
            c->put_pixels_tab[0][0] = put_pixels16_sse2;
2714 2738
            c->put_no_rnd_pixels_tab[0][0] = put_pixels16_sse2;
2715 2739
            c->avg_pixels_tab[0][0] = avg_pixels16_sse2;
2716 2740
            H264_QPEL_FUNCS(0, 0, sse2);
2741
            }
2717 2742
        }
2718 2743
        if(mm_flags & AV_CPU_FLAG_SSE2){
2744
            if (!h264_high_depth) {
2719 2745
            H264_QPEL_FUNCS(0, 1, sse2);
2720 2746
            H264_QPEL_FUNCS(0, 2, sse2);
2721 2747
            H264_QPEL_FUNCS(0, 3, sse2);
......
2728 2754
            H264_QPEL_FUNCS(3, 1, sse2);
2729 2755
            H264_QPEL_FUNCS(3, 2, sse2);
2730 2756
            H264_QPEL_FUNCS(3, 3, sse2);
2757
            }
2731 2758
        }
2732 2759
#if HAVE_SSSE3
2733 2760
        if(mm_flags & AV_CPU_FLAG_SSSE3){
2761
            if (!h264_high_depth) {
2734 2762
            H264_QPEL_FUNCS(1, 0, ssse3);
2735 2763
            H264_QPEL_FUNCS(1, 1, ssse3);
2736 2764
            H264_QPEL_FUNCS(1, 2, ssse3);
......
2743 2771
            H264_QPEL_FUNCS(3, 1, ssse3);
2744 2772
            H264_QPEL_FUNCS(3, 2, ssse3);
2745 2773
            H264_QPEL_FUNCS(3, 3, ssse3);
2774
            }
2746 2775
            c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
2747 2776
#if HAVE_YASM
2777
            if (!h264_high_depth) {
2748 2778
            c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
2749 2779
            c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
2750 2780
            c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
2751 2781
            c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_ssse3;
2782
            }
2752 2783
            c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3;
2753 2784
            if (mm_flags & AV_CPU_FLAG_SSE4) // not really sse4, just slow on Conroe
2754 2785
                c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4;
......
2805 2836
                }
2806 2837
            }
2807 2838

  
2839
            if (!h264_high_depth)
2808 2840
            c->emulated_edge_mc = emulated_edge_mc_sse;
2809 2841
            c->gmc= gmc_sse;
2810 2842
#endif

Also available in: Unified diff