Statistics
| Branch: | Revision:

ffmpeg / libavcodec / vp3dsp.c @ 79396ac6

History | View | Annotate | Download (8.86 KB)

1
/*
2
 * Copyright (C) 2004 the ffmpeg project
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
/**
20
 * @file vp3dsp.c
21
 * Standard C DSP-oriented functions cribbed from the original VP3 
22
 * source code.
23
 */
24

    
25
#include "common.h"
26
#include "avcodec.h"
27
#include "dsputil.h"
28

    
29
#define IdctAdjustBeforeShift 8
30
#define xC1S7 64277
31
#define xC2S6 60547
32
#define xC3S5 54491
33
#define xC4S4 46341
34
#define xC5S3 36410
35
#define xC6S2 25080
36
#define xC7S1 12785
37

    
38
static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
39
{
40
    int16_t *ip = input;
41
    uint8_t *cm = cropTbl + MAX_NEG_CROP;
42

    
43
    int A_, B_, C_, D_, _Ad, _Bd, _Cd, _Dd, E_, F_, G_, H_;
44
    int _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
45
    int t1, t2;
46

    
47
    int i;
48
    
49
    /* Inverse DCT on the rows now */
50
    for (i = 0; i < 8; i++) {
51
        /* Check for non-zero values */
52
        if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
53
            t1 = (int32_t)(xC1S7 * ip[1]);
54
            t2 = (int32_t)(xC7S1 * ip[7]);
55
            t1 >>= 16;
56
            t2 >>= 16;
57
            A_ = t1 + t2;
58

    
59
            t1 = (int32_t)(xC7S1 * ip[1]);
60
            t2 = (int32_t)(xC1S7 * ip[7]);
61
            t1 >>= 16;
62
            t2 >>= 16;
63
            B_ = t1 - t2;
64

    
65
            t1 = (int32_t)(xC3S5 * ip[3]);
66
            t2 = (int32_t)(xC5S3 * ip[5]);
67
            t1 >>= 16;
68
            t2 >>= 16;
69
            C_ = t1 + t2;
70

    
71
            t1 = (int32_t)(xC3S5 * ip[5]);
72
            t2 = (int32_t)(xC5S3 * ip[3]);
73
            t1 >>= 16;
74
            t2 >>= 16;
75
            D_ = t1 - t2;
76

    
77

    
78
            t1 = (int32_t)(xC4S4 * (A_ - C_));
79
            t1 >>= 16;
80
            _Ad = t1;
81

    
82
            t1 = (int32_t)(xC4S4 * (B_ - D_));
83
            t1 >>= 16;
84
            _Bd = t1;
85

    
86

    
87
            _Cd = A_ + C_;
88
            _Dd = B_ + D_;
89

    
90
            t1 = (int32_t)(xC4S4 * (ip[0] + ip[4]));
91
            t1 >>= 16;
92
            E_ = t1;
93

    
94
            t1 = (int32_t)(xC4S4 * (ip[0] - ip[4]));
95
            t1 >>= 16;
96
            F_ = t1;
97

    
98
            t1 = (int32_t)(xC2S6 * ip[2]);
99
            t2 = (int32_t)(xC6S2 * ip[6]);
100
            t1 >>= 16;
101
            t2 >>= 16;
102
            G_ = t1 + t2;
103

    
104
            t1 = (int32_t)(xC6S2 * ip[2]);
105
            t2 = (int32_t)(xC2S6 * ip[6]);
106
            t1 >>= 16;
107
            t2 >>= 16;
108
            H_ = t1 - t2;
109

    
110

    
111
            _Ed = E_ - G_;
112
            _Gd = E_ + G_;
113

    
114
            _Add = F_ + _Ad;
115
            _Bdd = _Bd - H_;
116

    
117
            _Fd = F_ - _Ad;
118
            _Hd = _Bd + H_;
119

    
120
            /*  Final sequence of operations over-write original inputs. */
121
            ip[0] = _Gd + _Cd ;
122
            ip[7] = _Gd - _Cd ;
123

    
124
            ip[1] = _Add + _Hd;
125
            ip[2] = _Add - _Hd;
126

    
127
            ip[3] = _Ed + _Dd ;
128
            ip[4] = _Ed - _Dd ;
129

    
130
            ip[5] = _Fd + _Bdd;
131
            ip[6] = _Fd - _Bdd;
132

    
133
        }
134

    
135
        ip += 8;            /* next row */
136
    }
137
    
138
    ip = input;
139

    
140
    for ( i = 0; i < 8; i++) {
141
        /* Check for non-zero values (bitwise or faster than ||) */
142
        if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
143
             ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
144

    
145
            t1 = (int32_t)(xC1S7 * ip[1*8]);
146
            t2 = (int32_t)(xC7S1 * ip[7*8]);
147
            t1 >>= 16;
148
            t2 >>= 16;
149
            A_ = t1 + t2;
150

    
151
            t1 = (int32_t)(xC7S1 * ip[1*8]);
152
            t2 = (int32_t)(xC1S7 * ip[7*8]);
153
            t1 >>= 16;
154
            t2 >>= 16;
155
            B_ = t1 - t2;
156

    
157
            t1 = (int32_t)(xC3S5 * ip[3*8]);
158
            t2 = (int32_t)(xC5S3 * ip[5*8]);
159
            t1 >>= 16;
160
            t2 >>= 16;
161
            C_ = t1 + t2;
162

    
163
            t1 = (int32_t)(xC3S5 * ip[5*8]);
164
            t2 = (int32_t)(xC5S3 * ip[3*8]);
165
            t1 >>= 16;
166
            t2 >>= 16;
167
            D_ = t1 - t2;
168

    
169

    
170
            t1 = (int32_t)(xC4S4 * (A_ - C_));
171
            t1 >>= 16;
172
            _Ad = t1;
173

    
174
            t1 = (int32_t)(xC4S4 * (B_ - D_));
175
            t1 >>= 16;
176
            _Bd = t1;
177

    
178

    
179
            _Cd = A_ + C_;
180
            _Dd = B_ + D_;
181

    
182
            t1 = (int32_t)(xC4S4 * (ip[0*8] + ip[4*8]));
183
            t1 >>= 16;
184
            E_ = t1;
185

    
186
            t1 = (int32_t)(xC4S4 * (ip[0*8] - ip[4*8]));
187
            t1 >>= 16;
188
            F_ = t1;
189

    
190
            t1 = (int32_t)(xC2S6 * ip[2*8]);
191
            t2 = (int32_t)(xC6S2 * ip[6*8]);
192
            t1 >>= 16;
193
            t2 >>= 16;
194
            G_ = t1 + t2;
195

    
196
            t1 = (int32_t)(xC6S2 * ip[2*8]);
197
            t2 = (int32_t)(xC2S6 * ip[6*8]);
198
            t1 >>= 16;
199
            t2 >>= 16;
200
            H_ = t1 - t2;
201

    
202

    
203
            _Ed = E_ - G_;
204
            _Gd = E_ + G_;
205

    
206
            _Add = F_ + _Ad;
207
            _Bdd = _Bd - H_;
208

    
209
            _Fd = F_ - _Ad;
210
            _Hd = _Bd + H_;
211

    
212
            if(type==1){  //HACK
213
                _Gd += 16*128;
214
                _Add+= 16*128;
215
                _Ed += 16*128;
216
                _Fd += 16*128;
217
            }
218
            _Gd += IdctAdjustBeforeShift;
219
            _Add += IdctAdjustBeforeShift;
220
            _Ed += IdctAdjustBeforeShift;
221
            _Fd += IdctAdjustBeforeShift;
222

    
223
            /* Final sequence of operations over-write original inputs. */
224
            if(type==0){
225
                ip[0*8] = (_Gd + _Cd )  >> 4;
226
                ip[7*8] = (_Gd - _Cd )  >> 4;
227
    
228
                ip[1*8] = (_Add + _Hd ) >> 4;
229
                ip[2*8] = (_Add - _Hd ) >> 4;
230
    
231
                ip[3*8] = (_Ed + _Dd )  >> 4;
232
                ip[4*8] = (_Ed - _Dd )  >> 4;
233
    
234
                ip[5*8] = (_Fd + _Bdd ) >> 4;
235
                ip[6*8] = (_Fd - _Bdd ) >> 4;
236
            }else if(type==1){
237
                dst[0*stride] = cm[(_Gd + _Cd )  >> 4];
238
                dst[7*stride] = cm[(_Gd - _Cd )  >> 4];
239
    
240
                dst[1*stride] = cm[(_Add + _Hd ) >> 4];
241
                dst[2*stride] = cm[(_Add - _Hd ) >> 4];
242
    
243
                dst[3*stride] = cm[(_Ed + _Dd )  >> 4];
244
                dst[4*stride] = cm[(_Ed - _Dd )  >> 4];
245
    
246
                dst[5*stride] = cm[(_Fd + _Bdd ) >> 4];
247
                dst[6*stride] = cm[(_Fd - _Bdd ) >> 4];
248
            }else{
249
                dst[0*stride] = cm[dst[0*stride] + ((_Gd + _Cd )  >> 4)];
250
                dst[7*stride] = cm[dst[7*stride] + ((_Gd - _Cd )  >> 4)];
251
    
252
                dst[1*stride] = cm[dst[1*stride] + ((_Add + _Hd ) >> 4)];
253
                dst[2*stride] = cm[dst[2*stride] + ((_Add - _Hd ) >> 4)];
254
    
255
                dst[3*stride] = cm[dst[3*stride] + ((_Ed + _Dd )  >> 4)];
256
                dst[4*stride] = cm[dst[4*stride] + ((_Ed - _Dd )  >> 4)];
257
    
258
                dst[5*stride] = cm[dst[5*stride] + ((_Fd + _Bdd ) >> 4)];
259
                dst[6*stride] = cm[dst[6*stride] + ((_Fd - _Bdd ) >> 4)];
260
            }
261

    
262
        } else {
263
            if(type==0){
264
                ip[0*8] = 
265
                ip[1*8] = 
266
                ip[2*8] = 
267
                ip[3*8] = 
268
                ip[4*8] = 
269
                ip[5*8] = 
270
                ip[6*8] =
271
                ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
272
            }else if(type==1){
273
                dst[0*stride]=
274
                dst[1*stride]=
275
                dst[2*stride]=
276
                dst[3*stride]=
277
                dst[4*stride]=
278
                dst[5*stride]=
279
                dst[6*stride]=
280
                dst[7*stride]= 128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
281
            }else{
282
                if(ip[0*8]){
283
                    int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
284
                    dst[0*stride] = cm[dst[0*stride] + v];
285
                    dst[1*stride] = cm[dst[1*stride] + v];
286
                    dst[2*stride] = cm[dst[2*stride] + v];
287
                    dst[3*stride] = cm[dst[3*stride] + v];
288
                    dst[4*stride] = cm[dst[4*stride] + v];
289
                    dst[5*stride] = cm[dst[5*stride] + v];
290
                    dst[6*stride] = cm[dst[6*stride] + v];
291
                    dst[7*stride] = cm[dst[7*stride] + v];
292
                }
293
            }
294
        }
295

    
296
        ip++;            /* next column */
297
        dst++;
298
    }
299
}
300

    
301
void ff_vp3_idct_c(DCTELEM *block/* align 16*/){
302
    idct(NULL, 0, block, 0);
303
}
304
    
305
void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
306
    idct(dest, line_size, block, 1);
307
}
308

    
309
void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
310
    idct(dest, line_size, block, 2);
311
}