Revision 3d32b429 libavcodec/vp3.c

View differences:

libavcodec/vp3.c
17 17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18 18
 *
19 19
 * VP3 Video Decoder by Mike Melanson (melanson@pcisys.net)
20
 * For more information about the VP3 coding process, visit:
21
 *   http://www.pcisys.net/~melanson/codecs/
20 22
 *
21 23
 */
22 24

  
......
288 290
} Vp3DecodeContext;
289 291

  
290 292
/************************************************************************
293
 * VP3 I/DCT
294
 ************************************************************************/
295

  
296
#define IdctAdjustBeforeShift 8
297
#define xC1S7 64277
298
#define xC2S6 60547
299
#define xC3S5 54491
300
#define xC4S4 46341
301
#define xC5S3 36410
302
#define xC6S2 25080
303
#define xC7S1 12785
304

  
305
void vp3_idct_c(int16_t *input_data, int16_t *dequant_matrix, 
306
    int16_t *output_data)
307
{
308
    int32_t intermediate_data[64];
309
    int32_t *ip = intermediate_data;
310
    int16_t *op = output_data;
311

  
312
    int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
313
    int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
314
    int32_t t1, t2;
315

  
316
    int i, j;
317

  
318
    debug_idct("raw coefficient block:\n");
319
    for (i = 0; i < 8; i++) {
320
        for (j = 0; j < 8; j++) {
321
            debug_idct(" %5d", input_data[i * 8 + j]);
322
        }
323
        debug_idct("\n");
324
    }
325
    debug_idct("\n");
326

  
327
    for (i = 0; i < 64; i++) {
328
        j = dezigzag_index[i];
329
        intermediate_data[j] = dequant_matrix[i] * input_data[i];
330
    }
331

  
332
    debug_idct("dequantized block:\n");
333
    for (i = 0; i < 8; i++) {
334
        for (j = 0; j < 8; j++) {
335
            debug_idct(" %5d", intermediate_data[i * 8 + j]);
336
        }
337
        debug_idct("\n");
338
    }
339
    debug_idct("\n");
340

  
341
    /* Inverse DCT on the rows now */
342
    for (i = 0; i < 8; i++) {
343
        /* Check for non-zero values */
344
        if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
345
            t1 = (int32_t)(xC1S7 * ip[1]);
346
            t2 = (int32_t)(xC7S1 * ip[7]);
347
            t1 >>= 16;
348
            t2 >>= 16;
349
            _A = t1 + t2;
350

  
351
            t1 = (int32_t)(xC7S1 * ip[1]);
352
            t2 = (int32_t)(xC1S7 * ip[7]);
353
            t1 >>= 16;
354
            t2 >>= 16;
355
            _B = t1 - t2;
356

  
357
            t1 = (int32_t)(xC3S5 * ip[3]);
358
            t2 = (int32_t)(xC5S3 * ip[5]);
359
            t1 >>= 16;
360
            t2 >>= 16;
361
            _C = t1 + t2;
362

  
363
            t1 = (int32_t)(xC3S5 * ip[5]);
364
            t2 = (int32_t)(xC5S3 * ip[3]);
365
            t1 >>= 16;
366
            t2 >>= 16;
367
            _D = t1 - t2;
368

  
369

  
370
            t1 = (int32_t)(xC4S4 * (_A - _C));
371
            t1 >>= 16;
372
            _Ad = t1;
373

  
374
            t1 = (int32_t)(xC4S4 * (_B - _D));
375
            t1 >>= 16;
376
            _Bd = t1;
377

  
378

  
379
            _Cd = _A + _C;
380
            _Dd = _B + _D;
381

  
382
            t1 = (int32_t)(xC4S4 * (ip[0] + ip[4]));
383
            t1 >>= 16;
384
            _E = t1;
385

  
386
            t1 = (int32_t)(xC4S4 * (ip[0] - ip[4]));
387
            t1 >>= 16;
388
            _F = t1;
389

  
390
            t1 = (int32_t)(xC2S6 * ip[2]);
391
            t2 = (int32_t)(xC6S2 * ip[6]);
392
            t1 >>= 16;
393
            t2 >>= 16;
394
            _G = t1 + t2;
395

  
396
            t1 = (int32_t)(xC6S2 * ip[2]);
397
            t2 = (int32_t)(xC2S6 * ip[6]);
398
            t1 >>= 16;
399
            t2 >>= 16;
400
            _H = t1 - t2;
401

  
402

  
403
            _Ed = _E - _G;
404
            _Gd = _E + _G;
405

  
406
            _Add = _F + _Ad;
407
            _Bdd = _Bd - _H;
408

  
409
            _Fd = _F - _Ad;
410
            _Hd = _Bd + _H;
411

  
412
            /*  Final sequence of operations over-write original inputs. */
413
            ip[0] = (int16_t)((_Gd + _Cd )   >> 0);
414
            ip[7] = (int16_t)((_Gd - _Cd )   >> 0);
415

  
416
            ip[1] = (int16_t)((_Add + _Hd )  >> 0);
417
            ip[2] = (int16_t)((_Add - _Hd )  >> 0);
418

  
419
            ip[3] = (int16_t)((_Ed + _Dd )   >> 0);
420
            ip[4] = (int16_t)((_Ed - _Dd )   >> 0);
421

  
422
            ip[5] = (int16_t)((_Fd + _Bdd )  >> 0);
423
            ip[6] = (int16_t)((_Fd - _Bdd )  >> 0);
424

  
425
        }
426

  
427
        ip += 8;            /* next row */
428
    }
429

  
430
    ip = intermediate_data;
431

  
432
    for ( i = 0; i < 8; i++) {
433
        /* Check for non-zero values (bitwise or faster than ||) */
434
        if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
435
             ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
436

  
437
            t1 = (int32_t)(xC1S7 * ip[1*8]);
438
            t2 = (int32_t)(xC7S1 * ip[7*8]);
439
            t1 >>= 16;
440
            t2 >>= 16;
441
            _A = t1 + t2;
442

  
443
            t1 = (int32_t)(xC7S1 * ip[1*8]);
444
            t2 = (int32_t)(xC1S7 * ip[7*8]);
445
            t1 >>= 16;
446
            t2 >>= 16;
447
            _B = t1 - t2;
448

  
449
            t1 = (int32_t)(xC3S5 * ip[3*8]);
450
            t2 = (int32_t)(xC5S3 * ip[5*8]);
451
            t1 >>= 16;
452
            t2 >>= 16;
453
            _C = t1 + t2;
454

  
455
            t1 = (int32_t)(xC3S5 * ip[5*8]);
456
            t2 = (int32_t)(xC5S3 * ip[3*8]);
457
            t1 >>= 16;
458
            t2 >>= 16;
459
            _D = t1 - t2;
460

  
461

  
462
            t1 = (int32_t)(xC4S4 * (_A - _C));
463
            t1 >>= 16;
464
            _Ad = t1;
465

  
466
            t1 = (int32_t)(xC4S4 * (_B - _D));
467
            t1 >>= 16;
468
            _Bd = t1;
469

  
470

  
471
            _Cd = _A + _C;
472
            _Dd = _B + _D;
473

  
474
            t1 = (int32_t)(xC4S4 * (ip[0*8] + ip[4*8]));
475
            t1 >>= 16;
476
            _E = t1;
477

  
478
            t1 = (int32_t)(xC4S4 * (ip[0*8] - ip[4*8]));
479
            t1 >>= 16;
480
            _F = t1;
481

  
482
            t1 = (int32_t)(xC2S6 * ip[2*8]);
483
            t2 = (int32_t)(xC6S2 * ip[6*8]);
484
            t1 >>= 16;
485
            t2 >>= 16;
486
            _G = t1 + t2;
487

  
488
            t1 = (int32_t)(xC6S2 * ip[2*8]);
489
            t2 = (int32_t)(xC2S6 * ip[6*8]);
490
            t1 >>= 16;
491
            t2 >>= 16;
492
            _H = t1 - t2;
493

  
494

  
495
            _Ed = _E - _G;
496
            _Gd = _E + _G;
497

  
498
            _Add = _F + _Ad;
499
            _Bdd = _Bd - _H;
500

  
501
            _Fd = _F - _Ad;
502
            _Hd = _Bd + _H;
503

  
504
            _Gd += IdctAdjustBeforeShift;
505
            _Add += IdctAdjustBeforeShift;
506
            _Ed += IdctAdjustBeforeShift;
507
            _Fd += IdctAdjustBeforeShift;
508

  
509
            /* Final sequence of operations over-write original inputs. */
510
            op[0*8] = (int16_t)((_Gd + _Cd )   >> 4);
511
            op[7*8] = (int16_t)((_Gd - _Cd )   >> 4);
512

  
513
            op[1*8] = (int16_t)((_Add + _Hd )  >> 4);
514
            op[2*8] = (int16_t)((_Add - _Hd )  >> 4);
515

  
516
            op[3*8] = (int16_t)((_Ed + _Dd )   >> 4);
517
            op[4*8] = (int16_t)((_Ed - _Dd )   >> 4);
518

  
519
            op[5*8] = (int16_t)((_Fd + _Bdd )  >> 4);
520
            op[6*8] = (int16_t)((_Fd - _Bdd )  >> 4);
521

  
522
        } else {
523

  
524
            op[0*8] = 0;
525
            op[7*8] = 0;
526
            op[1*8] = 0;
527
            op[2*8] = 0;
528
            op[3*8] = 0;
529
            op[4*8] = 0;
530
            op[5*8] = 0;
531
            op[6*8] = 0;
532
        }
533

  
534
        ip++;            /* next column */
535
        op++;
536
    }
537
}
538

  
539
void vp3_idct_put(int16_t *input_data, int16_t *dequant_matrix, 
540
    uint8_t *dest, int stride)
541
{
542
    int16_t transformed_data[64];
543
    int16_t *op;
544
    int i, j;
545

  
546
    vp3_idct_c(input_data, dequant_matrix, transformed_data);
547

  
548
    /* place in final output */
549
    op = transformed_data;
550
    for (i = 0; i < 8; i++) {
551
        for (j = 0; j < 8; j++) {
552
            if (*op < -128)
553
                *dest = 0;
554
            else if (*op > 127)
555
                *dest = 255;
556
            else
557
                *dest = (uint8_t)(*op + 128);
558
            op++;
559
            dest++;
560
        }
561
        dest += (stride - 8);
562
    }
563
}
564

  
565
void vp3_idct_add(int16_t *input_data, int16_t *dequant_matrix, 
566
    uint8_t *dest, int stride)
567
{
568
    int16_t transformed_data[64];
569
    int16_t *op;
570
    int i, j;
571
    int16_t sample;
572

  
573
    vp3_idct_c(input_data, dequant_matrix, transformed_data);
574

  
575
    /* place in final output */
576
    op = transformed_data;
577
    for (i = 0; i < 8; i++) {
578
        for (j = 0; j < 8; j++) {
579
            sample = *dest + *op;
580
            if (sample < 0)
581
                *dest = 0;
582
            else if (sample > 255)
583
                *dest = 255;
584
            else
585
                *dest = (uint8_t)(sample & 0xFF);
586
            op++;
587
            dest++;
588
        }
589
        dest += (stride - 8);
590
    }
591
}
592

  
593
/************************************************************************
291 594
 * VP3 specific functions
292 595
 ************************************************************************/
293 596

  
......
843 1146
     *
844 1147
     * Then, saturate the result to a lower limit of MIN_DEQUANT_VAL.
845 1148
     */
846
#define SCALER 1
1149
#define SCALER 4
847 1150

  
848 1151
    /* scale DC quantizers */
849 1152
    s->intra_y_dequant[0] = vp31_intra_y_dequant[0] * dc_scale_factor / 100;
......
1423 1726
    int current_fragment;
1424 1727

  
1425 1728
    debug_vp3("  vp3: unpacking motion vectors\n");
1426

  
1427 1729
    if (s->keyframe) {
1428 1730

  
1429 1731
        debug_vp3("    keyframe-- there are no motion vectors\n");
......
2030 2332
    int x, y;
2031 2333
    int m, n;
2032 2334
    int i = first_fragment;
2033
    int j;
2034 2335
    int16_t *dequantizer;
2035
    DCTELEM dequant_block[64];
2036
    DCTELEM dequant_block_permuted[64];
2037 2336
    unsigned char *output_plane;
2038 2337
    unsigned char *last_plane;
2039 2338
    unsigned char *golden_plane;
......
2122 2421
                     * to render the block */
2123 2422
                    if ((motion_source < upper_motion_limit) ||
2124 2423
                        (motion_source > lower_motion_limit)) {
2125
//                        printf ("  vp3: help! motion source (%d) out of range (%d..%d)\n",
2126
//                            motion_source, upper_motion_limit, lower_motion_limit);
2424
                        printf ("  vp3: help! motion source (%d) out of range (%d..%d), fragment %d\n",
2425
                            motion_source, upper_motion_limit, lower_motion_limit, i);
2127 2426
                        continue;
2128 2427
                    }
2129 2428
                }
......
2151 2450
                debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n", 
2152 2451
                    i, s->all_fragments[i].coding_method, 
2153 2452
                    s->all_fragments[i].coeffs[0], dequantizer[0]);
2154
                for (j = 0; j < 64; j++)
2155
                    dequant_block[dezigzag_index[j]] =
2156
                        s->all_fragments[i].coeffs[j] *
2157
                        dequantizer[j];
2158
                for (j = 0; j < 64; j++)
2159
                    dequant_block_permuted[s->dsp.idct_permutation[j]] =
2160
                        dequant_block[j];
2161

  
2162
                debug_idct("dequantized block:\n");
2163
                for (m = 0; m < 8; m++) {
2164
                    for (n = 0; n < 8; n++) {
2165
                        debug_idct(" %5d", dequant_block[m * 8 + n]);
2166
                    }
2167
                    debug_idct("\n");
2168
                }
2169
                debug_idct("\n");
2170 2453

  
2171 2454
                /* invert DCT and place (or add) in final output */
2172

  
2173 2455
                if (s->all_fragments[i].coding_method == MODE_INTRA) {
2174
                    dequant_block_permuted[0] += 1024;
2175
                    s->dsp.idct_put(
2456
                    vp3_idct_put(s->all_fragments[i].coeffs, dequantizer,
2176 2457
                        output_plane + s->all_fragments[i].first_pixel,
2177
                        stride, dequant_block_permuted);
2458
                        stride);
2178 2459
                } else {
2179
                    s->dsp.idct_add(
2460
                    vp3_idct_add(s->all_fragments[i].coeffs, dequantizer,
2180 2461
                        output_plane + s->all_fragments[i].first_pixel,
2181
                        stride, dequant_block_permuted);
2462
                        stride);
2182 2463
                }
2183 2464

  
2184 2465
                debug_idct("block after idct_%s():\n",
......
2479 2760
    }
2480 2761

  
2481 2762
    reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height);
2482
    reverse_dc_prediction(s, s->u_fragment_start,
2483
        s->fragment_width / 2, s->fragment_height / 2);
2484
    reverse_dc_prediction(s, s->v_fragment_start,
2485
        s->fragment_width / 2, s->fragment_height / 2);
2486

  
2487 2763
    render_fragments(s, 0, s->width, s->height, 0);
2488
#if 1
2489
    render_fragments(s, s->u_fragment_start, s->width / 2, s->height / 2, 1);
2490
    render_fragments(s, s->v_fragment_start, s->width / 2, s->height / 2, 2);
2491
#else
2492
memset(s->current_frame.data[1], 0x80, s->width * s->height / 4);
2493
memset(s->current_frame.data[2], 0x80, s->width * s->height / 4);
2494
#endif
2764

  
2765
    if ((avctx->flags & CODEC_FLAG_GRAY) == 0) {
2766
        reverse_dc_prediction(s, s->u_fragment_start,
2767
            s->fragment_width / 2, s->fragment_height / 2);
2768
        reverse_dc_prediction(s, s->v_fragment_start,
2769
            s->fragment_width / 2, s->fragment_height / 2);
2770
        render_fragments(s, s->u_fragment_start, s->width / 2, s->height / 2, 1);
2771
        render_fragments(s, s->v_fragment_start, s->width / 2, s->height / 2, 2);
2772
    } else {
2773
        memset(s->current_frame.data[1], 0x80, s->width * s->height / 4);
2774
        memset(s->current_frame.data[2], 0x80, s->width * s->height / 4);
2775
    }
2495 2776

  
2496 2777
#if KEYFRAMES_ONLY
2497 2778
}

Also available in: Unified diff