Revision 84705403

View differences:

libavcodec/dsputil.c
2526 2526
        dst[i+0] = src1[i+0]-src2[i+0];
2527 2527
}
2528 2528

  
2529
static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
2530
    int i;
2531
    uint8_t l, lt;
2532

  
2533
    l= *left;
2534
    lt= *left_top;
2535

  
2536
    for(i=0; i<w; i++){
2537
        const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
2538
        lt= src1[i];
2539
        l= src2[i];
2540
        dst[i]= l - pred;
2541
    }    
2542

  
2543
    *left= l;
2544
    *left_top= lt;
2545
}
2546

  
2529 2547
#define BUTTERFLY2(o1,o2,i1,i2) \
2530 2548
o1= (i1)+(i2);\
2531 2549
o2= (i1)-(i2);
......
3007 3025
        
3008 3026
    c->add_bytes= add_bytes_c;
3009 3027
    c->diff_bytes= diff_bytes_c;
3028
    c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
3010 3029
    c->bswap_buf= bswap_buf;
3011 3030

  
3012 3031
#ifdef HAVE_MMX
libavcodec/dsputil.h
234 234
    /* huffyuv specific */
235 235
    void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
236 236
    void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
237
    /**
238
     * subtract huffyuv's variant of median prediction
239
     * note, this might read from src1[-1], src2[-1]
240
     */
241
    void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top);
237 242
    void (*bswap_buf)(uint32_t *dst, uint32_t *src, int w);
238 243
    
239 244
    /* (I)DCT */
libavcodec/huffyuv.c
153 153
    *left_top= lt;
154 154
}
155 155

  
156
//FIXME optimize
157
static inline void sub_median_prediction(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
158
    int i;
159
    uint8_t l, lt;
160

  
161
    l= *left;
162
    lt= *left_top;
163

  
164
    for(i=0; i<w; i++){
165
        const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
166
        lt= src1[i];
167
        l= src2[i];
168
        dst[i]= l - pred;
169
    }    
170

  
171
    *left= l;
172
    *left_top= lt;
173
}
174

  
175 156
static inline void add_left_prediction_bgr32(uint8_t *dst, uint8_t *src, int w, int *red, int *green, int *blue){
176 157
    int i;
177 158
    int r,g,b;
......
999 980
            lefttopy= p->data[0][3];
1000 981
            lefttopu= p->data[1][1];
1001 982
            lefttopv= p->data[2][1];
1002
            sub_median_prediction(s->temp[0], p->data[0]+4, p->data[0] + fake_ystride+4, width-4 , &lefty, &lefttopy);
1003
            sub_median_prediction(s->temp[1], p->data[1]+2, p->data[1] + fake_ustride+2, width2-2, &leftu, &lefttopu);
1004
            sub_median_prediction(s->temp[2], p->data[2]+2, p->data[2] + fake_vstride+2, width2-2, &leftv, &lefttopv);
983
            s->dsp.sub_hfyu_median_prediction(s->temp[0], p->data[0]+4, p->data[0] + fake_ystride+4, width-4 , &lefty, &lefttopy);
984
            s->dsp.sub_hfyu_median_prediction(s->temp[1], p->data[1]+2, p->data[1] + fake_ustride+2, width2-2, &leftu, &lefttopu);
985
            s->dsp.sub_hfyu_median_prediction(s->temp[2], p->data[2]+2, p->data[2] + fake_vstride+2, width2-2, &leftv, &lefttopv);
1005 986
            encode_422_bitstream(s, width-4);
1006 987
            y++; cy++;
1007 988

  
......
1011 992
                if(s->bitstream_bpp==12){
1012 993
                    while(2*cy > y){
1013 994
                        ydst= p->data[0] + p->linesize[0]*y;
1014
                        sub_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy);
995
                        s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy);
1015 996
                        encode_gray_bitstream(s, width);
1016 997
                        y++;
1017 998
                    }
......
1021 1002
                udst= p->data[1] + p->linesize[1]*cy;
1022 1003
                vdst= p->data[2] + p->linesize[2]*cy;
1023 1004

  
1024
                sub_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy);
1025
                sub_median_prediction(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu);
1026
                sub_median_prediction(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv);
1005
                s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy);
1006
                s->dsp.sub_hfyu_median_prediction(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu);
1007
                s->dsp.sub_hfyu_median_prediction(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv);
1027 1008

  
1028 1009
                encode_422_bitstream(s, width);
1029 1010
            }
libavcodec/i386/dsputil_mmx.c
583 583
    for(; i<w; i++)
584 584
        dst[i+0] = src1[i+0]-src2[i+0];
585 585
}
586

  
587
static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
588
    int i=0;
589
    uint8_t l, lt;
590

  
591
    asm volatile(
592
        "1:				\n\t"
593
        "movq  -1(%1, %0), %%mm0	\n\t" // LT
594
        "movq  (%1, %0), %%mm1		\n\t" // T
595
        "movq  -1(%2, %0), %%mm2	\n\t" // L
596
        "movq  (%2, %0), %%mm3		\n\t" // X
597
        "movq %%mm2, %%mm4		\n\t" // L
598
        "psubb %%mm0, %%mm2		\n\t"
599
        "paddb %%mm1, %%mm2		\n\t" // L + T - LT
600
        "movq %%mm4, %%mm5		\n\t" // L
601
        "pmaxub %%mm1, %%mm4		\n\t" // max(T, L)
602
        "pminub %%mm5, %%mm1		\n\t" // min(T, L)
603
        "pminub %%mm2, %%mm4		\n\t" 
604
        "pmaxub %%mm1, %%mm4		\n\t"
605
        "psubb %%mm4, %%mm3		\n\t" // dst - pred
606
        "movq %%mm3, (%3, %0)		\n\t"
607
        "addl $8, %0			\n\t"
608
        "cmpl %4, %0			\n\t"
609
        " jb 1b				\n\t"
610
        : "+r" (i)
611
        : "r"(src1), "r"(src2), "r"(dst), "r"(w)
612
    );
613

  
614
    l= *left;
615
    lt= *left_top;
616
    
617
    dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&0xFF);
618
    
619
    *left_top= src1[w-1];
620
    *left    = src2[w-1];
621
}
622

  
586 623
#define LBUTTERFLY2(a1,b1,a2,b2)\
587 624
    "paddw " #b1 ", " #a1 "		\n\t"\
588 625
    "paddw " #b2 ", " #a2 "		\n\t"\
......
1699 1736
            SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_mmx2)
1700 1737
            SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_mmx2)
1701 1738
#endif
1739

  
1740
            c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2;
1702 1741
        } else if (mm_flags & MM_3DNOW) {
1703 1742
            c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
1704 1743
            c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;

Also available in: Unified diff