Revision 12802ec0

View differences:

libavcodec/dsputil.c
1600 1600
#undef op_avg
1601 1601
#undef op_put
1602 1602

  
1603
static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1604
    const int A=(8-x)*(8-y);
1605
    const int B=(  x)*(8-y);
1606
    const int C=(8-x)*(  y);
1607
    const int D=(  x)*(  y);
1608
    int i;
1609

  
1610
    assert(x<8 && y<8 && x>=0 && y>=0);
1611

  
1612
    for(i=0; i<h; i++)
1613
    {
1614
        dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
1615
        dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
1616
        dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
1617
        dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
1618
        dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
1619
        dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
1620
        dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
1621
        dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
1622
        dst+= stride;
1623
        src+= stride;
1624
    }
1625
}
1626

  
1627
static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
1628
    const int A=(8-x)*(8-y);
1629
    const int B=(  x)*(8-y);
1630
    const int C=(8-x)*(  y);
1631
    const int D=(  x)*(  y);
1632
    int i;
1633

  
1634
    assert(x<8 && y<8 && x>=0 && y>=0);
1635

  
1636
    for(i=0; i<h; i++)
1637
    {
1638
        dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
1639
        dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
1640
        dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
1641
        dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
1642
        dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
1643
        dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
1644
        dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
1645
        dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
1646
        dst+= stride;
1647
        src+= stride;
1648
    }
1649
}
1650

  
1651 1603
#define QPEL_MC(r, OPNAME, RND, OP) \
1652 1604
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1653 1605
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
......
4301 4253
    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_c;
4302 4254
    c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_c;
4303 4255
    c->avg_h264_chroma_pixels_tab[2]= avg_h264_chroma_mc2_c;
4304
    c->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c;
4305
    c->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c;
4306 4256

  
4307 4257
    c->draw_edges = draw_edges_c;
4308 4258

  
4309 4259
#if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
4310 4260
    ff_mlp_init(c, avctx);
4311 4261
#endif
4312
#if CONFIG_VC1_DECODER
4313
    ff_vc1dsp_init(c,avctx);
4314
#endif
4315 4262
#if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
4316 4263
    ff_intrax8dsp_init(c,avctx);
4317 4264
#endif
libavcodec/dsputil.h
341 341
     */
342 342
    h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
343 343
    h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
344
    /* This is really one func used in VC-1 decoding */
345
    h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3];
346
    h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3];
347 344

  
348 345
    qpel_mc_func put_h264_qpel_pixels_tab[4][16];
349 346
    qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
......
503 500
                               unsigned int filter_shift, int32_t mask, int blocksize,
504 501
                               int32_t *sample_buffer);
505 502

  
506
    /* vc1 functions */
507
    void (*vc1_inv_trans_8x8)(DCTELEM *b);
508
    void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
509
    void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
510
    void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
511
    void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
512
    void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
513
    void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
514
    void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
515
    void (*vc1_v_overlap)(uint8_t* src, int stride);
516
    void (*vc1_h_overlap)(uint8_t* src, int stride);
517
    void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq);
518
    void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq);
519
    void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq);
520
    void (*vc1_h_loop_filter8)(uint8_t *src, int stride, int pq);
521
    void (*vc1_v_loop_filter16)(uint8_t *src, int stride, int pq);
522
    void (*vc1_h_loop_filter16)(uint8_t *src, int stride, int pq);
523
    /* put 8x8 block with bicubic interpolation and quarterpel precision
524
     * last argument is actually round value instead of height
525
     */
526
    op_pixels_func put_vc1_mspel_pixels_tab[16];
527
    op_pixels_func avg_vc1_mspel_pixels_tab[16];
528

  
529 503
    /* intrax8 functions */
530 504
    void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
531 505
    void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize,
......
629 603
void ff_dsputil_init_dwt(DSPContext *c);
630 604
void ff_rv30dsp_init(DSPContext* c, AVCodecContext *avctx);
631 605
void ff_rv40dsp_init(DSPContext* c, AVCodecContext *avctx);
632
void ff_vc1dsp_init(DSPContext* c, AVCodecContext *avctx);
633 606
void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
634 607
void ff_mlp_init(DSPContext* c, AVCodecContext *avctx);
635 608
void ff_mlp_init_x86(DSPContext* c, AVCodecContext *avctx);
libavcodec/ppc/dsputil_altivec.h
43 43
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
44 44

  
45 45
void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx);
46
void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx);
47 46
void float_init_altivec(DSPContext* c, AVCodecContext *avctx);
48 47
void int_init_altivec(DSPContext* c, AVCodecContext *avctx);
49 48

  
libavcodec/ppc/dsputil_ppc.c
171 171

  
172 172
    if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
173 173
        dsputil_init_altivec(c, avctx);
174
        if(CONFIG_VC1_DECODER)
175
            vc1dsp_init_altivec(c, avctx);
176 174
        float_init_altivec(c, avctx);
177 175
        int_init_altivec(c, avctx);
178 176
        c->gmc1 = gmc1_altivec;
libavcodec/ppc/vc1dsp_altivec.c
322 322
}
323 323

  
324 324

  
325
void vc1dsp_init_altivec(DSPContext* dsp, AVCodecContext *avctx) {
325
void ff_vc1dsp_init_altivec(VC1DSPContext* dsp)
326
{
327
    if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
328
        return;
329

  
326 330
    dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec;
327 331
    dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec;
328 332
}
libavcodec/vc1.c
337 337
    v->res_fasttx = get_bits1(gb);
338 338
    if (!v->res_fasttx)
339 339
    {
340
        v->s.dsp.vc1_inv_trans_8x8 = ff_simple_idct;
341
        v->s.dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add;
342
        v->s.dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add;
343
        v->s.dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add;
344
        v->s.dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add;
345
        v->s.dsp.vc1_inv_trans_8x4_dc = ff_simple_idct84_add;
346
        v->s.dsp.vc1_inv_trans_4x8_dc = ff_simple_idct48_add;
347
        v->s.dsp.vc1_inv_trans_4x4_dc = ff_simple_idct44_add;
340
        v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct;
341
        v->vc1dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add;
342
        v->vc1dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add;
343
        v->vc1dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add;
344
        v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add;
345
        v->vc1dsp.vc1_inv_trans_8x4_dc = ff_simple_idct84_add;
346
        v->vc1dsp.vc1_inv_trans_4x8_dc = ff_simple_idct48_add;
347
        v->vc1dsp.vc1_inv_trans_4x4_dc = ff_simple_idct44_add;
348 348
    }
349 349

  
350 350
    v->fastuvmc =  get_bits1(gb); //common
libavcodec/vc1.h
26 26
#include "avcodec.h"
27 27
#include "mpegvideo.h"
28 28
#include "intrax8.h"
29
#include "vc1dsp.h"
29 30

  
30 31
/** Markers used in VC-1 AP frame data */
31 32
//@{
......
155 156
typedef struct VC1Context{
156 157
    MpegEncContext s;
157 158
    IntraX8Context x8;
159
    VC1DSPContext vc1dsp;
158 160

  
159 161
    int bits;
160 162

  
libavcodec/vc1dec.c
160 160

  
161 161
/** @} */ //Bitplane group
162 162

  
163
static void vc1_loop_filter_iblk(MpegEncContext *s, int pq)
163
static void vc1_loop_filter_iblk(VC1Context *v, int pq)
164 164
{
165
    MpegEncContext *s = &v->s;
165 166
    int j;
166 167
    if (!s->first_slice_line) {
167
        s->dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq);
168
        v->vc1dsp.vc1_v_loop_filter16(s->dest[0], s->linesize, pq);
168 169
        if (s->mb_x)
169
            s->dsp.vc1_h_loop_filter16(s->dest[0] - 16*s->linesize, s->linesize, pq);
170
        s->dsp.vc1_h_loop_filter16(s->dest[0] - 16*s->linesize+8, s->linesize, pq);
170
            v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16*s->linesize, s->linesize, pq);
171
        v->vc1dsp.vc1_h_loop_filter16(s->dest[0] - 16*s->linesize+8, s->linesize, pq);
171 172
        for(j = 0; j < 2; j++){
172
            s->dsp.vc1_v_loop_filter8(s->dest[j+1], s->uvlinesize, pq);
173
            v->vc1dsp.vc1_v_loop_filter8(s->dest[j+1], s->uvlinesize, pq);
173 174
            if (s->mb_x)
174
                s->dsp.vc1_h_loop_filter8(s->dest[j+1]-8*s->uvlinesize, s->uvlinesize, pq);
175
                v->vc1dsp.vc1_h_loop_filter8(s->dest[j+1]-8*s->uvlinesize, s->uvlinesize, pq);
175 176
        }
176 177
    }
177
    s->dsp.vc1_v_loop_filter16(s->dest[0] + 8*s->linesize, s->linesize, pq);
178
    v->vc1dsp.vc1_v_loop_filter16(s->dest[0] + 8*s->linesize, s->linesize, pq);
178 179

  
179 180
    if (s->mb_y == s->mb_height-1) {
180 181
        if (s->mb_x) {
181
            s->dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq);
182
            s->dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq);
183
            s->dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq);
182
            v->vc1dsp.vc1_h_loop_filter16(s->dest[0], s->linesize, pq);
183
            v->vc1dsp.vc1_h_loop_filter8(s->dest[1], s->uvlinesize, pq);
184
            v->vc1dsp.vc1_h_loop_filter8(s->dest[2], s->uvlinesize, pq);
184 185
        }
185
        s->dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq);
186
        v->vc1dsp.vc1_h_loop_filter16(s->dest[0] + 8, s->linesize, pq);
186 187
    }
187 188
}
188 189

  
......
342 343

  
343 344
    if(s->mspel) {
344 345
        dxy = ((my & 3) << 2) | (mx & 3);
345
        dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0]    , srcY    , s->linesize, v->rnd);
346
        dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd);
346
        v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0]    , srcY    , s->linesize, v->rnd);
347
        v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd);
347 348
        srcY += s->linesize * 8;
348
        dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize    , srcY    , s->linesize, v->rnd);
349
        dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
349
        v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize    , srcY    , s->linesize, v->rnd);
350
        v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
350 351
    } else { // hpel mc - always used for luma
351 352
        dxy = (my & 2) | ((mx & 2) >> 1);
352 353

  
......
364 365
        dsp->put_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
365 366
        dsp->put_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
366 367
    }else{
367
        dsp->put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
368
        dsp->put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
368
        v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
369
        v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
369 370
    }
370 371
}
371 372

  
......
433 434

  
434 435
    if(s->mspel) {
435 436
        dxy = ((my & 3) << 2) | (mx & 3);
436
        dsp->put_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize, v->rnd);
437
        v->vc1dsp.put_vc1_mspel_pixels_tab[dxy](s->dest[0] + off, srcY, s->linesize, v->rnd);
437 438
    } else { // hpel mc - always used for luma
438 439
        dxy = (my & 2) | ((mx & 2) >> 1);
439 440
        if(!v->rnd)
......
583 584
        dsp->put_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
584 585
        dsp->put_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
585 586
    }else{
586
        dsp->put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
587
        dsp->put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
587
        v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
588
        v->vc1dsp.put_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
588 589
    }
589 590
}
590 591

  
......
906 907

  
907 908
    if(s->mspel) {
908 909
        dxy = ((my & 3) << 2) | (mx & 3);
909
        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0]    , srcY    , s->linesize, v->rnd);
910
        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd);
910
        v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0]    , srcY    , s->linesize, v->rnd);
911
        v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd);
911 912
        srcY += s->linesize * 8;
912
        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize    , srcY    , s->linesize, v->rnd);
913
        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
913
        v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize    , srcY    , s->linesize, v->rnd);
914
        v->vc1dsp.avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
914 915
    } else { // hpel mc
915 916
        dxy = (my & 2) | ((mx & 2) >> 1);
916 917

  
......
928 929
        dsp->avg_h264_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
929 930
        dsp->avg_h264_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
930 931
    }else{
931
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
932
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
932
        v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[1], srcU, s->uvlinesize, 8, uvmx, uvmy);
933
        v->vc1dsp.avg_no_rnd_vc1_chroma_pixels_tab[0](s->dest[2], srcV, s->uvlinesize, 8, uvmx, uvmy);
933 934
    }
934 935
}
935 936

  
......
2039 2040
        }
2040 2041
        if(!skip_block){
2041 2042
            if(i==1)
2042
                s->dsp.vc1_inv_trans_8x8_dc(dst, linesize, block);
2043
                v->vc1dsp.vc1_inv_trans_8x8_dc(dst, linesize, block);
2043 2044
            else{
2044
                s->dsp.vc1_inv_trans_8x8(block);
2045
                v->vc1dsp.vc1_inv_trans_8x8(block);
2045 2046
                s->dsp.add_pixels_clamped(block, dst, linesize);
2046 2047
            }
2047 2048
            if(apply_filter && cbp_top  & 0xC)
2048
                s->dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
2049
                v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
2049 2050
            if(apply_filter && cbp_left & 0xA)
2050
                s->dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
2051
                v->vc1dsp.vc1_h_loop_filter8(dst, linesize, v->pq);
2051 2052
        }
2052 2053
        break;
2053 2054
    case TT_4X4:
......
2068 2069
            }
2069 2070
            if(!(subblkpat & (1 << (3 - j))) && !skip_block){
2070 2071
                if(i==1)
2071
                    s->dsp.vc1_inv_trans_4x4_dc(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off);
2072
                    v->vc1dsp.vc1_inv_trans_4x4_dc(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off);
2072 2073
                else
2073
                    s->dsp.vc1_inv_trans_4x4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off);
2074
                    v->vc1dsp.vc1_inv_trans_4x4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off);
2074 2075
                if(apply_filter && (j&2 ? pat & (1<<(j-2)) : (cbp_top & (1 << (j + 2)))))
2075
                    s->dsp.vc1_v_loop_filter4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, v->pq);
2076
                    v->vc1dsp.vc1_v_loop_filter4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, v->pq);
2076 2077
                if(apply_filter && (j&1 ? pat & (1<<(j-1)) : (cbp_left & (1 << (j + 1)))))
2077
                    s->dsp.vc1_h_loop_filter4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, v->pq);
2078
                    v->vc1dsp.vc1_h_loop_filter4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, v->pq);
2078 2079
            }
2079 2080
        }
2080 2081
        break;
......
2096 2097
            }
2097 2098
            if(!(subblkpat & (1 << (1 - j))) && !skip_block){
2098 2099
                if(i==1)
2099
                    s->dsp.vc1_inv_trans_8x4_dc(dst + j*4*linesize, linesize, block + off);
2100
                    v->vc1dsp.vc1_inv_trans_8x4_dc(dst + j*4*linesize, linesize, block + off);
2100 2101
                else
2101
                    s->dsp.vc1_inv_trans_8x4(dst + j*4*linesize, linesize, block + off);
2102
                    v->vc1dsp.vc1_inv_trans_8x4(dst + j*4*linesize, linesize, block + off);
2102 2103
                if(apply_filter && j ? pat & 0x3 : (cbp_top & 0xC))
2103
                    s->dsp.vc1_v_loop_filter8(dst + j*4*linesize, linesize, v->pq);
2104
                    v->vc1dsp.vc1_v_loop_filter8(dst + j*4*linesize, linesize, v->pq);
2104 2105
                if(apply_filter && cbp_left & (2 << j))
2105
                    s->dsp.vc1_h_loop_filter4(dst + j*4*linesize, linesize, v->pq);
2106
                    v->vc1dsp.vc1_h_loop_filter4(dst + j*4*linesize, linesize, v->pq);
2106 2107
            }
2107 2108
        }
2108 2109
        break;
......
2124 2125
            }
2125 2126
            if(!(subblkpat & (1 << (1 - j))) && !skip_block){
2126 2127
                if(i==1)
2127
                    s->dsp.vc1_inv_trans_4x8_dc(dst + j*4, linesize, block + off);
2128
                    v->vc1dsp.vc1_inv_trans_4x8_dc(dst + j*4, linesize, block + off);
2128 2129
                else
2129
                    s->dsp.vc1_inv_trans_4x8(dst + j*4, linesize, block + off);
2130
                    v->vc1dsp.vc1_inv_trans_4x8(dst + j*4, linesize, block + off);
2130 2131
                if(apply_filter && cbp_top & (2 << j))
2131
                    s->dsp.vc1_v_loop_filter4(dst + j*4, linesize, v->pq);
2132
                    v->vc1dsp.vc1_v_loop_filter4(dst + j*4, linesize, v->pq);
2132 2133
                if(apply_filter && j ? pat & 0x5 : (cbp_left & 0xA))
2133
                    s->dsp.vc1_h_loop_filter8(dst + j*4, linesize, v->pq);
2134
                    v->vc1dsp.vc1_h_loop_filter8(dst + j*4, linesize, v->pq);
2134 2135
            }
2135 2136
        }
2136 2137
        break;
......
2232 2233

  
2233 2234
                    vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset);
2234 2235
                    if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
2235
                    s->dsp.vc1_inv_trans_8x8(s->block[i]);
2236
                    v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
2236 2237
                    if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
2237 2238
                    s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2238 2239
                    if(v->pq >= 9 && v->overlap) {
2239 2240
                        if(v->c_avail)
2240
                            s->dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2241
                            v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2241 2242
                        if(v->a_avail)
2242
                            s->dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2243
                            v->vc1dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2243 2244
                    }
2244 2245
                    if(apply_loop_filter && s->mb_x && s->mb_x != (s->mb_width - 1) && s->mb_y && s->mb_y != (s->mb_height - 1)){
2245 2246
                        int left_cbp, top_cbp;
......
2251 2252
                            top_cbp  = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
2252 2253
                        }
2253 2254
                        if(left_cbp & 0xC)
2254
                            s->dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2255
                            v->vc1dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2255 2256
                        if(top_cbp  & 0xA)
2256
                            s->dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2257
                            v->vc1dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2257 2258
                    }
2258 2259
                    block_cbp |= 0xF << (i << 2);
2259 2260
                } else if(val) {
......
2268 2269
                            top_cbp  = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
2269 2270
                        }
2270 2271
                        if(left_cbp & 0xC)
2271
                            s->dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2272
                            v->vc1dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2272 2273
                        if(top_cbp  & 0xA)
2273
                            s->dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2274
                            v->vc1dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2274 2275
                    }
2275 2276
                    pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY), filter, left_cbp, top_cbp);
2276 2277
                    block_cbp |= pat << (i << 2);
......
2363 2364

  
2364 2365
                    vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant, (i&4)?v->codingset2:v->codingset);
2365 2366
                    if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
2366
                    s->dsp.vc1_inv_trans_8x8(s->block[i]);
2367
                    v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
2367 2368
                    if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
2368 2369
                    s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize);
2369 2370
                    if(v->pq >= 9 && v->overlap) {
2370 2371
                        if(v->c_avail)
2371
                            s->dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2372
                            v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2372 2373
                        if(v->a_avail)
2373
                            s->dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2374
                            v->vc1dsp.vc1_v_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2374 2375
                    }
2375 2376
                    if(v->s.loop_filter && s->mb_x && s->mb_x != (s->mb_width - 1) && s->mb_y && s->mb_y != (s->mb_height - 1)){
2376 2377
                        int left_cbp, top_cbp;
......
2382 2383
                            top_cbp  = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
2383 2384
                        }
2384 2385
                        if(left_cbp & 0xC)
2385
                            s->dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2386
                            v->vc1dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2386 2387
                        if(top_cbp  & 0xA)
2387
                            s->dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2388
                            v->vc1dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2388 2389
                    }
2389 2390
                    block_cbp |= 0xF << (i << 2);
2390 2391
                } else if(is_coded[i]) {
......
2399 2400
                            top_cbp  = (i & 2) ? (cbp >> ((i-2)*4)) : (v->cbp[s->mb_x - s->mb_stride] >> ((i+2)*4));
2400 2401
                        }
2401 2402
                        if(left_cbp & 0xC)
2402
                            s->dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2403
                            v->vc1dsp.vc1_v_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2403 2404
                        if(top_cbp  & 0xA)
2404
                            s->dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2405
                            v->vc1dsp.vc1_h_loop_filter8(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize, v->pq);
2405 2406
                    }
2406 2407
                    pat = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY), filter, left_cbp, top_cbp);
2407 2408
                    block_cbp |= pat << (i << 2);
......
2568 2569

  
2569 2570
            vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset);
2570 2571
            if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
2571
            s->dsp.vc1_inv_trans_8x8(s->block[i]);
2572
            v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
2572 2573
            if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
2573 2574
            s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2574 2575
        } else if(val) {
......
2650 2651

  
2651 2652
                vc1_decode_i_block(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2);
2652 2653

  
2653
                s->dsp.vc1_inv_trans_8x8(s->block[k]);
2654
                v->vc1dsp.vc1_inv_trans_8x8(s->block[k]);
2654 2655
                if(v->pq >= 9 && v->overlap) {
2655 2656
                    for(j = 0; j < 64; j++) s->block[k][j] += 128;
2656 2657
                }
......
2659 2660
            vc1_put_block(v, s->block);
2660 2661
            if(v->pq >= 9 && v->overlap) {
2661 2662
                if(s->mb_x) {
2662
                    s->dsp.vc1_h_overlap(s->dest[0], s->linesize);
2663
                    s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2663
                    v->vc1dsp.vc1_h_overlap(s->dest[0], s->linesize);
2664
                    v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2664 2665
                    if(!(s->flags & CODEC_FLAG_GRAY)) {
2665
                        s->dsp.vc1_h_overlap(s->dest[1], s->uvlinesize);
2666
                        s->dsp.vc1_h_overlap(s->dest[2], s->uvlinesize);
2666
                        v->vc1dsp.vc1_h_overlap(s->dest[1], s->uvlinesize);
2667
                        v->vc1dsp.vc1_h_overlap(s->dest[2], s->uvlinesize);
2667 2668
                    }
2668 2669
                }
2669
                s->dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize);
2670
                s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2670
                v->vc1dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize);
2671
                v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2671 2672
                if(!s->first_slice_line) {
2672
                    s->dsp.vc1_v_overlap(s->dest[0], s->linesize);
2673
                    s->dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize);
2673
                    v->vc1dsp.vc1_v_overlap(s->dest[0], s->linesize);
2674
                    v->vc1dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize);
2674 2675
                    if(!(s->flags & CODEC_FLAG_GRAY)) {
2675
                        s->dsp.vc1_v_overlap(s->dest[1], s->uvlinesize);
2676
                        s->dsp.vc1_v_overlap(s->dest[2], s->uvlinesize);
2676
                        v->vc1dsp.vc1_v_overlap(s->dest[1], s->uvlinesize);
2677
                        v->vc1dsp.vc1_v_overlap(s->dest[2], s->uvlinesize);
2677 2678
                    }
2678 2679
                }
2679
                s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2680
                s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2680
                v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2681
                v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2681 2682
            }
2682
            if(v->s.loop_filter) vc1_loop_filter_iblk(s, v->pq);
2683
            if(v->s.loop_filter) vc1_loop_filter_iblk(v, v->pq);
2683 2684

  
2684 2685
            if(get_bits_count(&s->gb) > v->bits) {
2685 2686
                ff_er_add_slice(s, 0, 0, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END));
......
2790 2791

  
2791 2792
                vc1_decode_i_block_adv(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2, mquant);
2792 2793

  
2793
                s->dsp.vc1_inv_trans_8x8(s->block[k]);
2794
                v->vc1dsp.vc1_inv_trans_8x8(s->block[k]);
2794 2795
                for(j = 0; j < 64; j++) s->block[k][j] += 128;
2795 2796
            }
2796 2797

  
2797 2798
            vc1_put_block(v, s->block);
2798 2799
            if(overlap) {
2799 2800
                if(s->mb_x) {
2800
                    s->dsp.vc1_h_overlap(s->dest[0], s->linesize);
2801
                    s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2801
                    v->vc1dsp.vc1_h_overlap(s->dest[0], s->linesize);
2802
                    v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2802 2803
                    if(!(s->flags & CODEC_FLAG_GRAY)) {
2803
                        s->dsp.vc1_h_overlap(s->dest[1], s->uvlinesize);
2804
                        s->dsp.vc1_h_overlap(s->dest[2], s->uvlinesize);
2804
                        v->vc1dsp.vc1_h_overlap(s->dest[1], s->uvlinesize);
2805
                        v->vc1dsp.vc1_h_overlap(s->dest[2], s->uvlinesize);
2805 2806
                    }
2806 2807
                }
2807
                s->dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize);
2808
                s->dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2808
                v->vc1dsp.vc1_h_overlap(s->dest[0] + 8, s->linesize);
2809
                v->vc1dsp.vc1_h_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2809 2810
                if(!s->first_slice_line) {
2810
                    s->dsp.vc1_v_overlap(s->dest[0], s->linesize);
2811
                    s->dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize);
2811
                    v->vc1dsp.vc1_v_overlap(s->dest[0], s->linesize);
2812
                    v->vc1dsp.vc1_v_overlap(s->dest[0] + 8, s->linesize);
2812 2813
                    if(!(s->flags & CODEC_FLAG_GRAY)) {
2813
                        s->dsp.vc1_v_overlap(s->dest[1], s->uvlinesize);
2814
                        s->dsp.vc1_v_overlap(s->dest[2], s->uvlinesize);
2814
                        v->vc1dsp.vc1_v_overlap(s->dest[1], s->uvlinesize);
2815
                        v->vc1dsp.vc1_v_overlap(s->dest[2], s->uvlinesize);
2815 2816
                    }
2816 2817
                }
2817
                s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2818
                s->dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2818
                v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize, s->linesize);
2819
                v->vc1dsp.vc1_v_overlap(s->dest[0] + 8 * s->linesize + 8, s->linesize);
2819 2820
            }
2820
            if(v->s.loop_filter) vc1_loop_filter_iblk(s, v->pq);
2821
            if(v->s.loop_filter) vc1_loop_filter_iblk(v, v->pq);
2821 2822

  
2822 2823
            if(get_bits_count(&s->gb) > v->bits) {
2823 2824
                ff_er_add_slice(s, 0, 0, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END));
......
2929 2930
                av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n", get_bits_count(&s->gb), v->bits,s->mb_x,s->mb_y);
2930 2931
                return;
2931 2932
            }
2932
            if(v->s.loop_filter) vc1_loop_filter_iblk(s, v->pq);
2933
            if(v->s.loop_filter) vc1_loop_filter_iblk(v, v->pq);
2933 2934
        }
2934 2935
        if (!v->s.loop_filter)
2935 2936
            ff_draw_horiz_band(s, s->mb_y * 16, 16);
......
3023 3024
    if(ff_msmpeg4_decode_init(avctx) < 0)
3024 3025
        return -1;
3025 3026
    if (vc1_init_common(v) < 0) return -1;
3027
    ff_vc1dsp_init(&v->vc1dsp);
3026 3028
    for (i = 0; i < 64;  i++) {
3027 3029
#define transpose(x) ((x>>3) | ((x&7)<<3))
3028 3030
        v->zz_8x8[0][i] = transpose(wmv1_scantable[0][i]);
libavcodec/vc1dsp.c
25 25
 *
26 26
 */
27 27

  
28
#include "dsputil.h"
28
#include "vc1dsp.h"
29 29

  
30 30

  
31 31
/** Apply overlap transform to horizontal edge
......
612 612
PUT_VC1_MSPEL(2, 3)
613 613
PUT_VC1_MSPEL(3, 3)
614 614

  
615
av_cold void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
615
static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
616
    const int A=(8-x)*(8-y);
617
    const int B=(  x)*(8-y);
618
    const int C=(8-x)*(  y);
619
    const int D=(  x)*(  y);
620
    int i;
621

  
622
    assert(x<8 && y<8 && x>=0 && y>=0);
623

  
624
    for(i=0; i<h; i++)
625
    {
626
        dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
627
        dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
628
        dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
629
        dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
630
        dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
631
        dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
632
        dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
633
        dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
634
        dst+= stride;
635
        src+= stride;
636
    }
637
}
638

  
639
#define avg2(a,b) ((a+b+1)>>1)
640
static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
641
    const int A=(8-x)*(8-y);
642
    const int B=(  x)*(8-y);
643
    const int C=(8-x)*(  y);
644
    const int D=(  x)*(  y);
645
    int i;
646

  
647
    assert(x<8 && y<8 && x>=0 && y>=0);
648

  
649
    for(i=0; i<h; i++)
650
    {
651
        dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
652
        dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
653
        dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
654
        dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
655
        dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
656
        dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
657
        dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
658
        dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
659
        dst+= stride;
660
        src+= stride;
661
    }
662
}
663

  
664
av_cold void ff_vc1dsp_init(VC1DSPContext* dsp) {
616 665
    dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c;
617 666
    dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c;
618 667
    dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c;
......
663 712
    dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_c;
664 713
    dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_c;
665 714
    dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_c;
715

  
716
    dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c;
717
    dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c;
718

  
719
    if (HAVE_ALTIVEC)
720
        ff_vc1dsp_init_altivec(dsp);
721
    if (HAVE_MMX)
722
        ff_vc1dsp_init_mmx(dsp);
666 723
}
libavcodec/vc1dsp.h
1
/*
2
 * VC-1 and WMV3 decoder - DSP functions
3
 * Copyright (c) 2006 Konstantin Shishkov
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

  
22
/**
23
 * @file
24
 * VC-1 and WMV3 decoder
25
 *
26
 */
27

  
28
#ifndef AVCODEC_VC1DSP_H
29
#define AVCODEC_VC1DSP_H
30

  
31
#include "dsputil.h"
32

  
33
typedef struct VC1DSPContext {
34
    /* vc1 functions */
35
    void (*vc1_inv_trans_8x8)(DCTELEM *b);
36
    void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
37
    void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
38
    void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);
39
    void (*vc1_inv_trans_8x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
40
    void (*vc1_inv_trans_8x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
41
    void (*vc1_inv_trans_4x8_dc)(uint8_t *dest, int line_size, DCTELEM *block);
42
    void (*vc1_inv_trans_4x4_dc)(uint8_t *dest, int line_size, DCTELEM *block);
43
    void (*vc1_v_overlap)(uint8_t* src, int stride);
44
    void (*vc1_h_overlap)(uint8_t* src, int stride);
45
    void (*vc1_v_loop_filter4)(uint8_t *src, int stride, int pq);
46
    void (*vc1_h_loop_filter4)(uint8_t *src, int stride, int pq);
47
    void (*vc1_v_loop_filter8)(uint8_t *src, int stride, int pq);
48
    void (*vc1_h_loop_filter8)(uint8_t *src, int stride, int pq);
49
    void (*vc1_v_loop_filter16)(uint8_t *src, int stride, int pq);
50
    void (*vc1_h_loop_filter16)(uint8_t *src, int stride, int pq);
51

  
52
    /* put 8x8 block with bicubic interpolation and quarterpel precision
53
     * last argument is actually round value instead of height
54
     */
55
    op_pixels_func put_vc1_mspel_pixels_tab[16];
56
    op_pixels_func avg_vc1_mspel_pixels_tab[16];
57

  
58
    /* This is really one func used in VC-1 decoding */
59
    h264_chroma_mc_func put_no_rnd_vc1_chroma_pixels_tab[3];
60
    h264_chroma_mc_func avg_no_rnd_vc1_chroma_pixels_tab[3];
61
} VC1DSPContext;
62

  
63
void ff_vc1dsp_init(VC1DSPContext* c);
64
void ff_vc1dsp_init_altivec(VC1DSPContext* c);
65
void ff_vc1dsp_init_mmx(VC1DSPContext* dsp);
66

  
67
#endif /* AVCODEC_VC1DSP_H */
libavcodec/x86/dsputil_mmx.c
1894 1894

  
1895 1895
void ff_put_h264_chroma_mc8_mmx_rnd   (uint8_t *dst, uint8_t *src,
1896 1896
                                       int stride, int h, int x, int y);
1897
void ff_put_vc1_chroma_mc8_mmx_nornd  (uint8_t *dst, uint8_t *src,
1898
                                       int stride, int h, int x, int y);
1899 1897
void ff_put_rv40_chroma_mc8_mmx       (uint8_t *dst, uint8_t *src,
1900 1898
                                       int stride, int h, int x, int y);
1901 1899
void ff_avg_h264_chroma_mc8_mmx2_rnd  (uint8_t *dst, uint8_t *src,
1902 1900
                                       int stride, int h, int x, int y);
1903
void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src,
1904
                                       int stride, int h, int x, int y);
1905 1901
void ff_avg_rv40_chroma_mc8_mmx2      (uint8_t *dst, uint8_t *src,
1906 1902
                                       int stride, int h, int x, int y);
1907 1903
void ff_avg_h264_chroma_mc8_3dnow_rnd (uint8_t *dst, uint8_t *src,
1908 1904
                                       int stride, int h, int x, int y);
1909
void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src,
1910
                                       int stride, int h, int x, int y);
1911 1905
void ff_avg_rv40_chroma_mc8_3dnow     (uint8_t *dst, uint8_t *src,
1912 1906
                                       int stride, int h, int x, int y);
1913 1907

  
......
1931 1925

  
1932 1926
void ff_put_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
1933 1927
                                       int stride, int h, int x, int y);
1934
void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
1935
                                       int stride, int h, int x, int y);
1936 1928
void ff_put_h264_chroma_mc4_ssse3     (uint8_t *dst, uint8_t *src,
1937 1929
                                       int stride, int h, int x, int y);
1938 1930

  
1939 1931
void ff_avg_h264_chroma_mc8_ssse3_rnd (uint8_t *dst, uint8_t *src,
1940 1932
                                       int stride, int h, int x, int y);
1941
void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
1942
                                       int stride, int h, int x, int y);
1943 1933
void ff_avg_h264_chroma_mc4_ssse3     (uint8_t *dst, uint8_t *src,
1944 1934
                                       int stride, int h, int x, int y);
1945 1935

  
......
2535 2525
#if HAVE_YASM
2536 2526
        c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_mmx_rnd;
2537 2527
        c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_mmx;
2538
        c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
2539 2528

  
2540 2529
        c->put_rv40_chroma_pixels_tab[0]= ff_put_rv40_chroma_mc8_mmx;
2541 2530
        c->put_rv40_chroma_pixels_tab[1]= ff_put_rv40_chroma_mc4_mmx;
......
2622 2611
            c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_mmx2;
2623 2612
            c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_mmx2;
2624 2613

  
2625
            c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd;
2626

  
2627 2614
            c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_mmx2_rnd;
2628 2615
            c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_mmx2;
2629 2616
            c->avg_h264_chroma_pixels_tab[2]= ff_avg_h264_chroma_mc2_mmx2;
......
2636 2623
                c->add_hfyu_median_prediction = add_hfyu_median_prediction_cmov;
2637 2624
#endif
2638 2625

  
2639
            if (CONFIG_VC1_DECODER)
2640
                ff_vc1dsp_init_mmx(c, avctx);
2641

  
2642 2626
            c->add_png_paeth_prediction= add_png_paeth_prediction_mmx2;
2643 2627
        } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
2644 2628
            c->prefetch = prefetch_3dnow;
......
2695 2679
            c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_3dnow_rnd;
2696 2680
            c->avg_h264_chroma_pixels_tab[1]= ff_avg_h264_chroma_mc4_3dnow;
2697 2681

  
2698
            c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_nornd;
2699

  
2700 2682
            c->avg_rv40_chroma_pixels_tab[0]= ff_avg_rv40_chroma_mc8_3dnow;
2701 2683
            c->avg_rv40_chroma_pixels_tab[1]= ff_avg_rv40_chroma_mc4_3dnow;
2702 2684
#endif
......
2745 2727
            H264_QPEL_FUNCS(3, 3, ssse3);
2746 2728
            c->add_png_paeth_prediction= add_png_paeth_prediction_ssse3;
2747 2729
#if HAVE_YASM
2748
            c->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_nornd;
2749
            c->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_nornd;
2750 2730
            c->put_h264_chroma_pixels_tab[0]= ff_put_h264_chroma_mc8_ssse3_rnd;
2751 2731
            c->avg_h264_chroma_pixels_tab[0]= ff_avg_h264_chroma_mc8_ssse3_rnd;
2752 2732
            c->put_h264_chroma_pixels_tab[1]= ff_put_h264_chroma_mc4_ssse3;
libavcodec/x86/dsputil_mmx.h
196 196
void ff_put_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
197 197
void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride);
198 198

  
199
void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);
200 199
void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd);
201 200
void ff_avg_vc1_mspel_mc00_mmx2(uint8_t *dst, const uint8_t *src, int stride, int rnd);
202 201

  
libavcodec/x86/vc1dsp_mmx.c
28 28
#include "libavutil/x86_cpu.h"
29 29
#include "libavcodec/dsputil.h"
30 30
#include "dsputil_mmx.h"
31
#include "libavcodec/vc1dsp.h"
31 32

  
32 33
#define OP_PUT(S,D)
33 34
#define OP_AVG(S,D) "pavgb " #S ", " #D " \n\t"
......
712 713
    ff_vc1_h_loop_filter8_sse4(src,          stride, pq);
713 714
    ff_vc1_h_loop_filter8_sse4(src+8*stride, stride, pq);
714 715
}
716

  
715 717
#endif
716 718

  
717
void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx) {
719
void ff_put_vc1_chroma_mc8_mmx_nornd  (uint8_t *dst, uint8_t *src,
720
                                       int stride, int h, int x, int y);
721
void ff_avg_vc1_chroma_mc8_mmx2_nornd (uint8_t *dst, uint8_t *src,
722
                                       int stride, int h, int x, int y);
723
void ff_avg_vc1_chroma_mc8_3dnow_nornd(uint8_t *dst, uint8_t *src,
724
                                       int stride, int h, int x, int y);
725
void ff_put_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
726
                                       int stride, int h, int x, int y);
727
void ff_avg_vc1_chroma_mc8_ssse3_nornd(uint8_t *dst, uint8_t *src,
728
                                       int stride, int h, int x, int y);
729

  
730
void ff_vc1dsp_init_mmx(VC1DSPContext *dsp)
731
{
718 732
    int mm_flags = av_get_cpu_flags();
719 733

  
720
    dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx;
721
    dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;
722
    dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx;
723
    dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_mmx;
724

  
725
    dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_mmx;
726
    dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_mmx;
727
    dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_mmx;
728
    dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_mmx;
729

  
730
    dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_mmx;
731
    dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_mmx;
732
    dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_mmx;
733
    dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_mmx;
734

  
735
    dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_mmx;
736
    dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_mmx;
737
    dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx;
738
    dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx;
734
    if (mm_flags & AV_CPU_FLAG_MMX) {
735
        dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_mmx;
736
        dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_mmx;
737
        dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_mmx;
738
        dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_mmx;
739

  
740
        dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_mmx;
741
        dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_mmx;
742
        dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_mmx;
743
        dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_mmx;
744

  
745
        dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_mmx;
746
        dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_mmx;
747
        dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_mmx;
748
        dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_mmx;
749

  
750
        dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_mmx;
751
        dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_mmx;
752
        dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_mmx;
753
        dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_mmx;
754
    }
739 755

  
740 756
    if (mm_flags & AV_CPU_FLAG_MMX2){
741 757
        dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_mmx2;
......
775 791
#if HAVE_YASM
776 792
    if (mm_flags & AV_CPU_FLAG_MMX) {
777 793
        ASSIGN_LF(mmx);
794
        dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_mmx_nornd;
778 795
    }
779 796
    return;
780 797
    if (mm_flags & AV_CPU_FLAG_MMX2) {
781 798
        ASSIGN_LF(mmx2);
799
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_mmx2_nornd;
800
    } else if (mm_flags & AV_CPU_FLAG_3DNOW) {
801
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_3dnow_nornd;
782 802
    }
803

  
783 804
    if (mm_flags & AV_CPU_FLAG_SSE2) {
784 805
        dsp->vc1_v_loop_filter8  = ff_vc1_v_loop_filter8_sse2;
785 806
        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse2;
......
788 809
    }
789 810
    if (mm_flags & AV_CPU_FLAG_SSSE3) {
790 811
        ASSIGN_LF(ssse3);
812
        dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= ff_put_vc1_chroma_mc8_ssse3_nornd;
813
        dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= ff_avg_vc1_chroma_mc8_ssse3_nornd;
791 814
    }
792 815
    if (mm_flags & AV_CPU_FLAG_SSE4) {
793 816
        dsp->vc1_h_loop_filter8  = ff_vc1_h_loop_filter8_sse4;

Also available in: Unified diff