Revision 059715a4

View differences:

libavcodec/dsputil.c
30 30
#include "mpegvideo.h"
31 31
#include "simple_idct.h"
32 32
#include "faandct.h"
33
#include "snow.h"
33 34

  
34 35
/* snow.c */
35 36
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count);
......
4047 4048
    c->try_8x8basis= try_8x8basis_c;
4048 4049
    c->add_8x8basis= add_8x8basis_c;
4049 4050

  
4051
    c->vertical_compose97i = ff_snow_vertical_compose97i;
4052
    c->horizontal_compose97i = ff_snow_horizontal_compose97i;
4053
    c->inner_add_yblock = ff_snow_inner_add_yblock;
4054

  
4050 4055
#ifdef HAVE_MMX
4051 4056
    dsputil_init_mmx(c, avctx);
4052 4057
#endif
libavcodec/dsputil.h
35 35
//#define DEBUG
36 36
/* dct code */
37 37
typedef short DCTELEM;
38
typedef int DWTELEM;
38 39

  
39 40
void fdct_ifast (DCTELEM *data);
40 41
void fdct_ifast248 (DCTELEM *data);
......
133 134
typedef int (*me_cmp_func)(void /*MpegEncContext*/ *s, uint8_t *blk1/*align width (8 or 16)*/, uint8_t *blk2/*align 1*/, int line_size, int h)/* __attribute__ ((const))*/;
134 135

  
135 136

  
137
// for snow slices
138
typedef struct slice_buffer_s slice_buffer;
139

  
136 140
/**
137 141
 * DSPContext.
138 142
 */
......
334 338
    void (*h264_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
335 339
    void (*h264_idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
336 340
    void (*h264_idct8_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
341

  
342
    /* snow wavelet */
343
    void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
344
    void (*horizontal_compose97i)(DWTELEM *b, int width);
345
    void (*inner_add_yblock)(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
337 346
} DSPContext;
338 347

  
339 348
void dsputil_static_init(void);
libavcodec/i386/mmx.h
12 12
#  define REG_d "rdx"
13 13
#  define REG_D "rdi"
14 14
#  define REG_S "rsi"
15
#  define PTR_SIZE "8"
15 16
#else
16 17
#  define REG_a "eax"
17 18
#  define REG_b "ebx"
......
19 20
#  define REG_d "edx"
20 21
#  define REG_D "edi"
21 22
#  define REG_S "esi"
23
#  define PTR_SIZE "4"
22 24
#endif
23 25

  
24 26
/*
libavcodec/snow.c
19 19
#include "avcodec.h"
20 20
#include "common.h"
21 21
#include "dsputil.h"
22
#include "snow.h"
22 23

  
23 24
#include "rangecoder.h"
24
#define MID_STATE 128
25 25

  
26 26
#include "mpegvideo.h"
27 27

  
28 28
#undef NDEBUG
29 29
#include <assert.h>
30 30

  
31
#define MAX_DECOMPOSITIONS 8
32
#define MAX_PLANES 4
33
#define DWTELEM int
34
#define QSHIFT 5
35
#define QROOT (1<<QSHIFT)
36
#define LOSSLESS_QLOG -128
37
#define FRAC_BITS 8
38

  
39 31
static const int8_t quant3[256]={
40 32
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 33
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
......
181 173
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182 174
};
183 175

  
184
#define LOG2_OBMC_MAX 6
185
#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
186 176
#if 0 //64*cubic
187 177
static const uint8_t obmc32[1024]={
188 178
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
......
425 415
    SubBand band[MAX_DECOMPOSITIONS][4];
426 416
}Plane;
427 417

  
428
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
429
typedef struct {
430
    DWTELEM * * line; ///< For use by idwt and predict_slices.
431
    DWTELEM * * data_stack; ///< Used for internal purposes.
432
    int data_stack_top;
433
    int line_count;
434
    int line_width;
435
    int data_count;
436
    DWTELEM * base_buffer; ///< Buffer that this structure is caching.
437
} slice_buffer;
438

  
439 418
typedef struct SnowContext{
440 419
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
441 420

  
......
741 720
    }
742 721
}
743 722

  
723
#ifndef lift5
744 724
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
745 725
    const int mirror_left= !highpass;
746 726
    const int mirror_right= (width&1) ^ highpass;
......
770 750
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
771 751
    }
772 752
}
753
#endif
773 754

  
755
#ifndef liftS
774 756
static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
775 757
    const int mirror_left= !highpass;
776 758
    const int mirror_right= (width&1) ^ highpass;
......
793 775
        dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
794 776
    }
795 777
}
778
#endif
796 779

  
797 780

  
798 781
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
......
1111 1094
    }
1112 1095
}
1113 1096

  
1114
#define liftS lift
1115
#define lift5 lift
1116
#if 1
1117
#define W_AM 3
1118
#define W_AO 0
1119
#define W_AS 1
1120

  
1121
#undef liftS
1122
#define W_BM 1
1123
#define W_BO 8
1124
#define W_BS 4
1125

  
1126
#define W_CM 1
1127
#define W_CO 0
1128
#define W_CS 0
1129

  
1130
#define W_DM 3
1131
#define W_DO 4
1132
#define W_DS 3
1133
#elif 0
1134
#define W_AM 55
1135
#define W_AO 16
1136
#define W_AS 5
1137

  
1138
#define W_BM 3
1139
#define W_BO 32
1140
#define W_BS 6
1141

  
1142
#define W_CM 127
1143
#define W_CO 64
1144
#define W_CS 7
1145

  
1146
#define W_DM 7
1147
#define W_DO 8
1148
#define W_DS 4
1149
#elif 0
1150
#define W_AM 97
1151
#define W_AO 32
1152
#define W_AS 6
1153

  
1154
#define W_BM 63
1155
#define W_BO 512
1156
#define W_BS 10
1157

  
1158
#define W_CM 13
1159
#define W_CO 8
1160
#define W_CS 4
1161

  
1162
#define W_DM 15
1163
#define W_DO 16
1164
#define W_DS 5
1165

  
1166
#else
1167

  
1168
#define W_AM 203
1169
#define W_AO 64
1170
#define W_AS 7
1171

  
1172
#define W_BM 217
1173
#define W_BO 2048
1174
#define W_BS 12
1175

  
1176
#define W_CM 113
1177
#define W_CO 64
1178
#define W_CS 7
1179

  
1180
#define W_DM 227
1181
#define W_DO 128
1182
#define W_DS 9
1183
#endif
1184 1097
static void horizontal_decompose97i(DWTELEM *b, int width){
1185 1098
    DWTELEM temp[width];
1186 1099
    const int w2= (width+1)>>1;
......
1410 1323
}
1411 1324

  
1412 1325

  
1413
static void horizontal_compose97i(DWTELEM *b, int width){
1326
void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
1414 1327
    DWTELEM temp[width];
1415 1328
    const int w2= (width+1)>>1;
1416 1329

  
......
1463 1376
    }
1464 1377
}
1465 1378

  
1466
static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1379
void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1467 1380
    int i;
1468 1381

  
1469 1382
    for(i=0; i<width; i++){
......
1504 1417
    cs->y = -3;
1505 1418
}
1506 1419

  
1507
static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1420
static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1508 1421
    int y = cs->y;
1509 1422

  
1510 1423
    DWTELEM *b0= cs->b0;
......
1516 1429

  
1517 1430
{START_TIMER
1518 1431
    if(y>0 && y+4<height){
1519
        vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1432
        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1520 1433
    }else{
1521 1434
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1522 1435
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
......
1527 1440
STOP_TIMER("vertical_compose97i")}}
1528 1441

  
1529 1442
{START_TIMER
1530
        if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
1531
        if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
1443
        if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1444
        if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1532 1445
if(width>400 && y+0<(unsigned)height){
1533 1446
STOP_TIMER("horizontal_compose97i")}}
1534 1447

  
......
1557 1470
STOP_TIMER("vertical_compose97i")}}
1558 1471

  
1559 1472
{START_TIMER
1560
        if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
1561
        if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
1473
        if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1474
        if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1562 1475
if(width>400 && b0 <= b2){
1563 1476
STOP_TIMER("horizontal_compose97i")}}
1564 1477

  
......
1619 1532
    }
1620 1533
}
1621 1534

  
1622
static void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1535
static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1623 1536
    const int support = type==1 ? 3 : 5;
1624 1537
    int level;
1625 1538
    if(type==2) return;
......
1627 1540
    for(level=decomposition_count-1; level>=0; level--){
1628 1541
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1629 1542
            switch(type){
1630
            case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1543
            case 0: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1631 1544
                    break;
1632 1545
            case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1633 1546
                    break;
......
2545 2458
    }
2546 2459
}
2547 2460

  
2461
void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2462
                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2463
    int y, x;
2464
    DWTELEM * dst;
2465
    for(y=0; y<b_h; y++){
2466
        //FIXME ugly missue of obmc_stride
2467
        uint8_t *obmc1= obmc + y*obmc_stride;
2468
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2469
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2470
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2471
        dst = slice_buffer_get_line(sb, src_y + y);
2472
        for(x=0; x<b_w; x++){
2473
            int v=   obmc1[x] * block[3][x + y*src_stride]
2474
                    +obmc2[x] * block[2][x + y*src_stride]
2475
                    +obmc3[x] * block[1][x + y*src_stride]
2476
                    +obmc4[x] * block[0][x + y*src_stride];
2477

  
2478
            v <<= 8 - LOG2_OBMC_MAX;
2479
            if(FRAC_BITS != 8){
2480
                v += 1<<(7 - FRAC_BITS);
2481
                v >>= 8 - FRAC_BITS;
2482
            }
2483
            if(add){
2484
                v += dst[x + src_x];
2485
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2486
                if(v&(~255)) v= ~(v>>31);
2487
                dst8[x + y*src_stride] = v;
2488
            }else{
2489
                dst[x + src_x] -= v;
2490
            }
2491
        }
2492
    }
2493
}
2494

  
2548 2495
//FIXME name clenup (b_w, block_w, b_width stuff)
2549 2496
static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2550 2497
    DWTELEM * dst = NULL;
......
2669 2616

  
2670 2617
    START_TIMER
2671 2618

  
2672
    for(y=0; y<b_h; y++){
2673
        //FIXME ugly missue of obmc_stride
2674
        uint8_t *obmc1= obmc + y*obmc_stride;
2675
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2676
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2677
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2678
        dst = slice_buffer_get_line(sb, src_y + y);
2679
        for(x=0; x<b_w; x++){
2680
            int v=   obmc1[x] * block[3][x + y*src_stride]
2681
                    +obmc2[x] * block[2][x + y*src_stride]
2682
                    +obmc3[x] * block[1][x + y*src_stride]
2683
                    +obmc4[x] * block[0][x + y*src_stride];
2684

  
2685
            v <<= 8 - LOG2_OBMC_MAX;
2686
            if(FRAC_BITS != 8){
2687
                v += 1<<(7 - FRAC_BITS);
2688
                v >>= 8 - FRAC_BITS;
2689
            }
2690
            if(add){
2691
//                v += old_dst[x + y*dst_stride];
2692
                v += dst[x + src_x];
2693
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2694
                if(v&(~255)) v= ~(v>>31);
2695
                dst8[x + y*src_stride] = v;
2696
            }else{
2697
//                old_dst[x + y*dst_stride] -= v;
2698
                dst[x + src_x] -= v;
2699
            }
2700
        }
2701
    }
2619
    s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2702 2620
        STOP_TIMER("Inner add y block")
2703 2621
}
2704 2622
#endif
......
4399 4317

  
4400 4318
{   START_TIMER
4401 4319
        for(; yd<slice_h; yd+=4){
4402
            ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4320
            ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4403 4321
        }
4404 4322
    STOP_TIMER("idwt slice");}
4405 4323

  
libavcodec/snow.h
1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 * Copyright (C) 2006 Robert Edele <yartrebo@earthlink.net>
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18
 */
19

  
20
#ifndef _SNOW_H
21
#define _SNOW_H
22

  
23
#include "dsputil.h"
24

  
25
#define MID_STATE 128
26

  
27
#define MAX_DECOMPOSITIONS 8
28
#define MAX_PLANES 4
29
#define QSHIFT 5
30
#define QROOT (1<<QSHIFT)
31
#define LOSSLESS_QLOG -128
32
#define FRAC_BITS 8
33

  
34
#define LOG2_OBMC_MAX 6
35
#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
36

  
37
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
38
struct slice_buffer_s {
39
    DWTELEM * * line; ///< For use by idwt and predict_slices.
40
    DWTELEM * * data_stack; ///< Used for internal purposes.
41
    int data_stack_top;
42
    int line_count;
43
    int line_width;
44
    int data_count;
45
    DWTELEM * base_buffer; ///< Buffer that this structure is caching.
46
};
47

  
48
#define liftS lift
49
#define lift5 lift
50
#if 1
51
#define W_AM 3
52
#define W_AO 0
53
#define W_AS 1
54

  
55
#undef liftS
56
#define W_BM 1
57
#define W_BO 8
58
#define W_BS 4
59

  
60
#define W_CM 1
61
#define W_CO 0
62
#define W_CS 0
63

  
64
#define W_DM 3
65
#define W_DO 4
66
#define W_DS 3
67
#elif 0
68
#define W_AM 55
69
#define W_AO 16
70
#define W_AS 5
71

  
72
#define W_BM 3
73
#define W_BO 32
74
#define W_BS 6
75

  
76
#define W_CM 127
77
#define W_CO 64
78
#define W_CS 7
79

  
80
#define W_DM 7
81
#define W_DO 8
82
#define W_DS 4
83
#elif 0
84
#define W_AM 97
85
#define W_AO 32
86
#define W_AS 6
87

  
88
#define W_BM 63
89
#define W_BO 512
90
#define W_BS 10
91

  
92
#define W_CM 13
93
#define W_CO 8
94
#define W_CS 4
95

  
96
#define W_DM 15
97
#define W_DO 16
98
#define W_DS 5
99

  
100
#else
101

  
102
#define W_AM 203
103
#define W_AO 64
104
#define W_AS 7
105

  
106
#define W_BM 217
107
#define W_BO 2048
108
#define W_BS 12
109

  
110
#define W_CM 113
111
#define W_CO 64
112
#define W_CS 7
113

  
114
#define W_DM 227
115
#define W_DO 128
116
#define W_DS 9
117
#endif
118

  
119
extern void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width);
120
extern void ff_snow_horizontal_compose97i(DWTELEM *b, int width);
121
extern void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8);
122

  
123
#endif

Also available in: Unified diff