Revision f8bed30d

View differences:

libavcodec/ppc/vc1dsp_altivec.c
130 130

  
131 131
/** Do inverse transform on 8x8 block
132 132
*/
133
static void vc1_inv_trans_8x8_altivec(DCTELEM block[64])
133
static void vc1_inv_trans_8x8_altivec(DCTELEM block[64],
134
                                      int sign, int rangered)
134 135
{
135 136
    vector signed short src0, src1, src2, src3, src4, src5, src6, src7;
136 137
    vector signed int s0, s1, s2, s3, s4, s5, s6, s7;
......
144 145
    const vector unsigned int vec_2 = vec_splat_u32(2);
145 146
    const vector  signed int vec_1s = vec_splat_s32(1);
146 147
    const vector unsigned int vec_1 = vec_splat_u32(1);
147

  
148
    const vector unsigned short rangered_shift = vec_splat_u16(1);
149
    const vector   signed short signed_bias = vec_sl(vec_splat_u16(4),
150
                                                     vec_splat_u16(4));
148 151

  
149 152
    src0 = vec_ld(  0, block);
150 153
    src1 = vec_ld( 16, block);
......
214 217
    src6 = vec_pack(sE, s6);
215 218
    src7 = vec_pack(sF, s7);
216 219

  
220
    if (rangered) {
221
        if (!sign) {
222
            vec_sub(src0, signed_bias);
223
            vec_sub(src1, signed_bias);
224
            vec_sub(src2, signed_bias);
225
            vec_sub(src3, signed_bias);
226
            vec_sub(src4, signed_bias);
227
            vec_sub(src5, signed_bias);
228
            vec_sub(src6, signed_bias);
229
            vec_sub(src7, signed_bias);
230
        }
231
        vec_sl(src0, rangered_shift);
232
        vec_sl(src1, rangered_shift);
233
        vec_sl(src2, rangered_shift);
234
        vec_sl(src3, rangered_shift);
235
        vec_sl(src4, rangered_shift);
236
        vec_sl(src5, rangered_shift);
237
        vec_sl(src6, rangered_shift);
238
        vec_sl(src7, rangered_shift);
239
    }
240

  
217 241
    vec_st(src0,  0, block);
218 242
    vec_st(src1, 16, block);
219 243
    vec_st(src2, 32, block);
......
224 248
    vec_st(src7,112, block);
225 249
}
226 250

  
251
static void vc1_inv_trans_8x8_add_altivec(uint8_t *dest, int stride, DCTELEM *b)
252
{
253
    vc1_inv_trans_8x8_altivec(b, 0, 0);
254
    ff_add_pixels_clamped_c(b, dest, stride);
255
}
256

  
257
static void vc1_inv_trans_8x8_put_signed_altivec(uint8_t *dest, int stride, DCTELEM *b)
258
{
259
    vc1_inv_trans_8x8_altivec(b, 1, 0);
260
    ff_put_signed_pixels_clamped_c(b, dest, stride);
261
}
262

  
263
static void vc1_inv_trans_8x8_put_signed_rangered_altivec(uint8_t *dest, int stride, DCTELEM *b)
264
{
265
    vc1_inv_trans_8x8_altivec(b, 1, 1);
266
    ff_put_signed_pixels_clamped_c(b, dest, stride);
267
}
268

  
269
static void vc1_inv_trans_8x8_put_altivec(uint8_t *dest, int stride, DCTELEM *b)
270
{
271
    vc1_inv_trans_8x8_altivec(b, 0, 0);
272
    ff_put_pixels_clamped_c(b, dest, stride);
273
}
274

  
275
static void vc1_inv_trans_8x8_put_rangered_altivec(uint8_t *dest, int stride, DCTELEM *b)
276
{
277
    vc1_inv_trans_8x8_altivec(b, 0, 1);
278
    ff_put_pixels_clamped_c(b, dest, stride);
279
}
280

  
227 281
/** Do inverse transform on 8x4 part of block
228 282
*/
229 283
static void vc1_inv_trans_8x4_altivec(uint8_t *dest, int stride, DCTELEM *block)
......
342 396
    if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
343 397
        return;
344 398

  
345
    dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec;
399
    dsp->vc1_inv_trans_8x8_add = vc1_inv_trans_8x8_add_altivec;
400
    dsp->vc1_inv_trans_8x8_put_signed[0] = vc1_inv_trans_8x8_put_signed_altivec;
401
    dsp->vc1_inv_trans_8x8_put_signed[1] = vc1_inv_trans_8x8_put_signed_rangered_altivec;
402
    dsp->vc1_inv_trans_8x8_put[0] = vc1_inv_trans_8x8_put_altivec;
403
    dsp->vc1_inv_trans_8x8_put[1] = vc1_inv_trans_8x8_put_rangered_altivec;
346 404
    dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec;
347 405
    dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = put_no_rnd_vc1_chroma_mc8_altivec;
348 406
    dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = avg_no_rnd_vc1_chroma_mc8_altivec;
libavcodec/vc1.c
280 280

  
281 281
static int decode_sequence_header_adv(VC1Context *v, GetBitContext *gb);
282 282

  
283
static void simple_idct_put_rangered(uint8_t *dest, int line_size, DCTELEM *block)
284
{
285
    int i;
286
    ff_simple_idct(block);
287
    for (i = 0; i < 64; i++) block[i] = (block[i] - 64) << 1;
288
    ff_put_pixels_clamped_c(block, dest, line_size);
289
}
290

  
291
static void simple_idct_put_signed(uint8_t *dest, int line_size, DCTELEM *block)
292
{
293
    ff_simple_idct(block);
294
    ff_put_signed_pixels_clamped_c(block, dest, line_size);
295
}
296

  
297
static void simple_idct_put_signed_rangered(uint8_t *dest, int line_size, DCTELEM *block)
298
{
299
    int i;
300
    ff_simple_idct(block);
301
    for (i = 0; i < 64; i++) block[i] <<= 1;
302
    ff_put_signed_pixels_clamped_c(block, dest, line_size);
303
}
304

  
283 305
/**
284 306
 * Decode Simple/Main Profiles sequence header
285 307
 * @see Figure 7-8, p16-17
......
337 359
    v->res_fasttx = get_bits1(gb);
338 360
    if (!v->res_fasttx)
339 361
    {
340
        v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct;
362
        v->vc1dsp.vc1_inv_trans_8x8_add = ff_simple_idct_add;
363
        v->vc1dsp.vc1_inv_trans_8x8_put[0] = ff_simple_idct_put;
364
        v->vc1dsp.vc1_inv_trans_8x8_put[1] = simple_idct_put_rangered;
365
        v->vc1dsp.vc1_inv_trans_8x8_put_signed[0] = simple_idct_put_signed;
366
        v->vc1dsp.vc1_inv_trans_8x8_put_signed[1] = simple_idct_put_signed_rangered;
341 367
        v->vc1dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add;
342 368
        v->vc1dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add;
343 369
        v->vc1dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add;
libavcodec/vc1dec.c
2009 2009
            if(i==1)
2010 2010
                v->vc1dsp.vc1_inv_trans_8x8_dc(dst, linesize, block);
2011 2011
            else{
2012
                v->vc1dsp.vc1_inv_trans_8x8(block);
2013
                s->dsp.add_pixels_clamped(block, dst, linesize);
2012
                v->vc1dsp.vc1_inv_trans_8x8_add(dst, linesize, block);
2014 2013
            }
2015 2014
            if(apply_filter && cbp_top  & 0xC)
2016 2015
                v->vc1dsp.vc1_v_loop_filter8(dst, linesize, v->pq);
......
2117 2116
{
2118 2117
    MpegEncContext *s = &v->s;
2119 2118
    GetBitContext *gb = &s->gb;
2120
    int i, j;
2119
    int i;
2121 2120
    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
2122 2121
    int cbp; /* cbp decoding stuff */
2123 2122
    int mqdiff, mquant; /* MB quantization */
......
2149 2148
    {
2150 2149
        if (!skipped)
2151 2150
        {
2151
            vc1_idct_func idct8x8_fn;
2152

  
2152 2153
            GET_MVDATA(dmv_x, dmv_y);
2153 2154

  
2154 2155
            if (s->mb_intra) {
......
2183 2184
                                VC1_TTMB_VLC_BITS, 2);
2184 2185
            if(!s->mb_intra) vc1_mc_1mv(v, 0);
2185 2186
            dst_idx = 0;
2187
            idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm];
2186 2188
            for (i=0; i<6; i++)
2187 2189
            {
2188 2190
                s->dc_val[0][s->block_index[i]] = 0;
......
2200 2202

  
2201 2203
                    vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset);
2202 2204
                    if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
2203
                    v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
2204
                    if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
2205
                    s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2205
                    idct8x8_fn(s->dest[dst_idx] + off,
2206
                               i & 4 ? s->uvlinesize : s->linesize,
2207
                               s->block[i]);
2206 2208
                    if(v->pq >= 9 && v->overlap) {
2207 2209
                        if(v->c_avail)
2208 2210
                            v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
......
2267 2269
        {
2268 2270
            int intra_count = 0, coded_inter = 0;
2269 2271
            int is_intra[6], is_coded[6];
2272
            vc1_idct_func idct8x8_fn;
2270 2273
            /* Get CBPCY */
2271 2274
            cbp = get_vlc2(&v->s.gb, v->cbpcy_vlc->table, VC1_CBPCY_P_VLC_BITS, 2);
2272 2275
            for (i=0; i<6; i++)
......
2316 2319
            }
2317 2320
            if (!v->ttmbf && coded_inter)
2318 2321
                ttmb = get_vlc2(gb, ff_vc1_ttmb_vlc[v->tt_index].table, VC1_TTMB_VLC_BITS, 2);
2322
            idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm];
2319 2323
            for (i=0; i<6; i++)
2320 2324
            {
2321 2325
                dst_idx += i >> 2;
......
2331 2335

  
2332 2336
                    vc1_decode_intra_block(v, s->block[i], i, is_coded[i], mquant, (i&4)?v->codingset2:v->codingset);
2333 2337
                    if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
2334
                    v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
2335
                    if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
2336
                    s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize);
2338
                    idct8x8_fn(s->dest[dst_idx] + off,
2339
                               (i&4)?s->uvlinesize:s->linesize,
2340
                               s->block[i]);
2337 2341
                    if(v->pq >= 9 && v->overlap) {
2338 2342
                        if(v->c_avail)
2339 2343
                            v->vc1dsp.vc1_h_overlap(s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
......
2409 2413
{
2410 2414
    MpegEncContext *s = &v->s;
2411 2415
    GetBitContext *gb = &s->gb;
2412
    int i, j;
2416
    int i;
2413 2417
    int mb_pos = s->mb_x + s->mb_y * s->mb_stride;
2414 2418
    int cbp = 0; /* cbp decoding stuff */
2415 2419
    int mqdiff, mquant; /* MB quantization */
......
2422 2426
    int skipped, direct;
2423 2427
    int dmv_x[2], dmv_y[2];
2424 2428
    int bmvtype = BMV_TYPE_BACKWARD;
2429
    vc1_idct_func idct8x8_fn;
2425 2430

  
2426 2431
    mquant = v->pq; /* Loosy initialization */
2427 2432
    s->mb_intra = 0;
......
2519 2524
        }
2520 2525
    }
2521 2526
    dst_idx = 0;
2527
    idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm];
2522 2528
    for (i=0; i<6; i++)
2523 2529
    {
2524 2530
        s->dc_val[0][s->block_index[i]] = 0;
......
2536 2542

  
2537 2543
            vc1_decode_intra_block(v, s->block[i], i, val, mquant, (i&4)?v->codingset2:v->codingset);
2538 2544
            if((i>3) && (s->flags & CODEC_FLAG_GRAY)) continue;
2539
            v->vc1dsp.vc1_inv_trans_8x8(s->block[i]);
2540
            if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1;
2541
            s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, i & 4 ? s->uvlinesize : s->linesize);
2545
            idct8x8_fn(s->dest[dst_idx] + off,
2546
                       i & 4 ? s->uvlinesize : s->linesize,
2547
                       s->block[i]);
2542 2548
        } else if(val) {
2543 2549
            vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY), 0, 0, 0);
2544 2550
            if(!v->ttmbf && ttmb < 8) ttmb = -1;
......
2551 2557
 */
2552 2558
static void vc1_decode_i_blocks(VC1Context *v)
2553 2559
{
2554
    int k, j;
2560
    int k;
2555 2561
    MpegEncContext *s = &v->s;
2556 2562
    int cbp, val;
2557 2563
    uint8_t *coded_val;
2558 2564
    int mb_pos;
2565
    vc1_idct_func idct8x8_fn;
2559 2566

  
2560 2567
    /* select codingmode used for VLC tables selection */
2561 2568
    switch(v->y_ac_table_index){
......
2590 2597
    s->mb_x = s->mb_y = 0;
2591 2598
    s->mb_intra = 1;
2592 2599
    s->first_slice_line = 1;
2600
    if(v->pq >= 9 && v->overlap) {
2601
        idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[!!v->rangeredfrm];
2602
    } else
2603
        idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put[!!v->rangeredfrm];
2593 2604
    for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
2594 2605
        s->mb_x = 0;
2595 2606
        ff_init_block_index(s);
......
2626 2637
                vc1_decode_i_block(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2);
2627 2638

  
2628 2639
                if (k > 3 && (s->flags & CODEC_FLAG_GRAY)) continue;
2629
                v->vc1dsp.vc1_inv_trans_8x8(s->block[k]);
2630
                if(v->pq >= 9 && v->overlap) {
2631
                    if (v->rangeredfrm) for(j = 0; j < 64; j++) s->block[k][j] <<= 1;
2632
                    s->dsp.put_signed_pixels_clamped(s->block[k], dst[k], k & 4 ? s->uvlinesize : s->linesize);
2633
                } else {
2634
                    if (v->rangeredfrm) for(j = 0; j < 64; j++) s->block[k][j] = (s->block[k][j] - 64) << 1;
2635
                    s->dsp.put_pixels_clamped(s->block[k], dst[k], k & 4 ? s->uvlinesize : s->linesize);
2636
                }
2640
                idct8x8_fn(dst[k],
2641
                           k & 4 ? s->uvlinesize : s->linesize,
2642
                           s->block[k]);
2637 2643
            }
2638 2644

  
2639 2645
            if(v->pq >= 9 && v->overlap) {
......
2691 2697
    int mqdiff;
2692 2698
    int overlap;
2693 2699
    GetBitContext *gb = &s->gb;
2700
    vc1_idct_func idct8x8_fn;
2694 2701

  
2695 2702
    /* select codingmode used for VLC tables selection */
2696 2703
    switch(v->y_ac_table_index){
......
2721 2728
    s->mb_x = s->mb_y = 0;
2722 2729
    s->mb_intra = 1;
2723 2730
    s->first_slice_line = 1;
2731
    idct8x8_fn = v->vc1dsp.vc1_inv_trans_8x8_put_signed[0];
2724 2732
    for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
2725 2733
        s->mb_x = 0;
2726 2734
        ff_init_block_index(s);
......
2777 2785
                vc1_decode_i_block_adv(v, s->block[k], k, val, (k<4)? v->codingset : v->codingset2, mquant);
2778 2786

  
2779 2787
                if (k > 3 && (s->flags & CODEC_FLAG_GRAY)) continue;
2780
                v->vc1dsp.vc1_inv_trans_8x8(s->block[k]);
2781
                s->dsp.put_signed_pixels_clamped(s->block[k], dst[k],
2782
                                                 k & 4 ? s->uvlinesize : s->linesize);
2788
                idct8x8_fn(dst[k],
2789
                           k & 4 ? s->uvlinesize : s->linesize,
2790
                           s->block[k]);
2783 2791
            }
2784 2792

  
2785 2793
            if(overlap) {
libavcodec/vc1dsp.c
199 199
    }
200 200
}
201 201

  
202
static void vc1_inv_trans_8x8_c(DCTELEM block[64])
202
static av_always_inline void vc1_inv_trans_8x8_c(DCTELEM block[64], int shl, int sub)
203 203
{
204 204
    int i;
205 205
    register int t1,t2,t3,t4,t5,t6,t7,t8;
......
254 254
        t3 =  9 * src[ 8] - 16 * src[24] +  4 * src[40] + 15 * src[56];
255 255
        t4 =  4 * src[ 8] -  9 * src[24] + 15 * src[40] - 16 * src[56];
256 256

  
257
        dst[ 0] = (t5 + t1) >> 7;
258
        dst[ 8] = (t6 + t2) >> 7;
259
        dst[16] = (t7 + t3) >> 7;
260
        dst[24] = (t8 + t4) >> 7;
261
        dst[32] = (t8 - t4 + 1) >> 7;
262
        dst[40] = (t7 - t3 + 1) >> 7;
263
        dst[48] = (t6 - t2 + 1) >> 7;
264
        dst[56] = (t5 - t1 + 1) >> 7;
257
        dst[ 0] = (((t5 + t1    ) >> 7) - sub) << shl;
258
        dst[ 8] = (((t6 + t2    ) >> 7) - sub) << shl;
259
        dst[16] = (((t7 + t3    ) >> 7) - sub) << shl;
260
        dst[24] = (((t8 + t4    ) >> 7) - sub) << shl;
261
        dst[32] = (((t8 - t4 + 1) >> 7) - sub) << shl;
262
        dst[40] = (((t7 - t3 + 1) >> 7) - sub) << shl;
263
        dst[48] = (((t6 - t2 + 1) >> 7) - sub) << shl;
264
        dst[56] = (((t5 - t1 + 1) >> 7) - sub) << shl;
265 265

  
266 266
        src++;
267 267
        dst++;
268 268
    }
269 269
}
270 270

  
271
static void vc1_inv_trans_8x8_add_c(uint8_t *dest, int linesize, DCTELEM *block)
272
{
273
    vc1_inv_trans_8x8_c(block, 0, 0);
274
    ff_add_pixels_clamped_c(block, dest, linesize);
275
}
276

  
277
static void vc1_inv_trans_8x8_put_signed_c(uint8_t *dest, int linesize, DCTELEM *block)
278
{
279
    vc1_inv_trans_8x8_c(block, 0, 0);
280
    ff_put_signed_pixels_clamped_c(block, dest, linesize);
281
}
282

  
283
static void vc1_inv_trans_8x8_put_signed_rangered_c(uint8_t *dest, int linesize, DCTELEM *block)
284
{
285
    vc1_inv_trans_8x8_c(block, 1, 0);
286
    ff_put_signed_pixels_clamped_c(block, dest, linesize);
287
}
288

  
289
static void vc1_inv_trans_8x8_put_c(uint8_t *dest, int linesize, DCTELEM *block)
290
{
291
    vc1_inv_trans_8x8_c(block, 0, 0);
292
    ff_put_pixels_clamped_c(block, dest, linesize);
293
}
294

  
295
static void vc1_inv_trans_8x8_put_rangered_c(uint8_t *dest, int linesize, DCTELEM *block)
296
{
297
    vc1_inv_trans_8x8_c(block, 1, 64);
298
    ff_put_pixels_clamped_c(block, dest, linesize);
299
}
300

  
271 301
/** Do inverse transform on 8x4 part of block
272 302
*/
273 303
static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, DCTELEM *block)
......
662 692
}
663 693

  
664 694
av_cold void ff_vc1dsp_init(VC1DSPContext* dsp) {
665
    dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c;
695
    dsp->vc1_inv_trans_8x8_add = vc1_inv_trans_8x8_add_c;
696
    dsp->vc1_inv_trans_8x8_put_signed[0] = vc1_inv_trans_8x8_put_signed_c;
697
    dsp->vc1_inv_trans_8x8_put_signed[1] = vc1_inv_trans_8x8_put_signed_rangered_c;
698
    dsp->vc1_inv_trans_8x8_put[0] = vc1_inv_trans_8x8_put_c;
699
    dsp->vc1_inv_trans_8x8_put[1] = vc1_inv_trans_8x8_put_rangered_c;
666 700
    dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c;
667 701
    dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c;
668 702
    dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c;
libavcodec/vc1dsp.h
30 30

  
31 31
#include "dsputil.h"
32 32

  
33
typedef void (*vc1_idct_func)(uint8_t *dest, int line_size, DCTELEM *block);
34

  
33 35
typedef struct VC1DSPContext {
34 36
    /* vc1 functions */
35
    void (*vc1_inv_trans_8x8)(DCTELEM *b);
37
    vc1_idct_func vc1_inv_trans_8x8_add;
38
    vc1_idct_func vc1_inv_trans_8x8_put_signed[2];
39
    vc1_idct_func vc1_inv_trans_8x8_put[2];
36 40
    void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block);
37 41
    void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block);
38 42
    void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block);

Also available in: Unified diff