Statistics
| Branch: | Revision:

ffmpeg / libavcodec / vp3dsp.c @ 71160c55

History | View | Annotate | Download (8.8 KB)

1 44cb64ee Mike Melanson
/*
2
 * Copyright (C) 2004 the ffmpeg project
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18
19
/**
20
 * @file vp3dsp.c
21 115329f1 Diego Biurrun
 * Standard C DSP-oriented functions cribbed from the original VP3
22 44cb64ee Mike Melanson
 * source code.
23
 */
24
25
#include "common.h"
26
#include "avcodec.h"
27 e96682e6 Michael Niedermayer
#include "dsputil.h"
28 44cb64ee Mike Melanson
29
#define IdctAdjustBeforeShift 8
30
#define xC1S7 64277
31
#define xC2S6 60547
32
#define xC3S5 54491
33
#define xC4S4 46341
34
#define xC5S3 36410
35
#define xC6S2 25080
36
#define xC7S1 12785
37
38 8b6103da Michael Niedermayer
static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int type)
39 44cb64ee Mike Melanson
{
40 8b6103da Michael Niedermayer
    int16_t *ip = input;
41
    uint8_t *cm = cropTbl + MAX_NEG_CROP;
42 44cb64ee Mike Melanson
43 8b6103da Michael Niedermayer
    int A_, B_, C_, D_, _Ad, _Bd, _Cd, _Dd, E_, F_, G_, H_;
44
    int _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
45
    int t1, t2;
46 44cb64ee Mike Melanson
47 bf4e3bd2 Måns Rullgård
    int i;
48 115329f1 Diego Biurrun
49 44cb64ee Mike Melanson
    /* Inverse DCT on the rows now */
50
    for (i = 0; i < 8; i++) {
51
        /* Check for non-zero values */
52
        if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
53
            t1 = (int32_t)(xC1S7 * ip[1]);
54
            t2 = (int32_t)(xC7S1 * ip[7]);
55
            t1 >>= 16;
56
            t2 >>= 16;
57
            A_ = t1 + t2;
58
59
            t1 = (int32_t)(xC7S1 * ip[1]);
60
            t2 = (int32_t)(xC1S7 * ip[7]);
61
            t1 >>= 16;
62
            t2 >>= 16;
63
            B_ = t1 - t2;
64
65
            t1 = (int32_t)(xC3S5 * ip[3]);
66
            t2 = (int32_t)(xC5S3 * ip[5]);
67
            t1 >>= 16;
68
            t2 >>= 16;
69
            C_ = t1 + t2;
70
71
            t1 = (int32_t)(xC3S5 * ip[5]);
72
            t2 = (int32_t)(xC5S3 * ip[3]);
73
            t1 >>= 16;
74
            t2 >>= 16;
75
            D_ = t1 - t2;
76
77
78
            t1 = (int32_t)(xC4S4 * (A_ - C_));
79
            t1 >>= 16;
80
            _Ad = t1;
81
82
            t1 = (int32_t)(xC4S4 * (B_ - D_));
83
            t1 >>= 16;
84
            _Bd = t1;
85
86
87
            _Cd = A_ + C_;
88
            _Dd = B_ + D_;
89
90
            t1 = (int32_t)(xC4S4 * (ip[0] + ip[4]));
91
            t1 >>= 16;
92
            E_ = t1;
93
94
            t1 = (int32_t)(xC4S4 * (ip[0] - ip[4]));
95
            t1 >>= 16;
96
            F_ = t1;
97
98
            t1 = (int32_t)(xC2S6 * ip[2]);
99
            t2 = (int32_t)(xC6S2 * ip[6]);
100
            t1 >>= 16;
101
            t2 >>= 16;
102
            G_ = t1 + t2;
103
104
            t1 = (int32_t)(xC6S2 * ip[2]);
105
            t2 = (int32_t)(xC2S6 * ip[6]);
106
            t1 >>= 16;
107
            t2 >>= 16;
108
            H_ = t1 - t2;
109
110
111
            _Ed = E_ - G_;
112
            _Gd = E_ + G_;
113
114
            _Add = F_ + _Ad;
115
            _Bdd = _Bd - H_;
116
117
            _Fd = F_ - _Ad;
118
            _Hd = _Bd + H_;
119
120
            /*  Final sequence of operations over-write original inputs. */
121 8b6103da Michael Niedermayer
            ip[0] = _Gd + _Cd ;
122
            ip[7] = _Gd - _Cd ;
123 44cb64ee Mike Melanson
124 8b6103da Michael Niedermayer
            ip[1] = _Add + _Hd;
125
            ip[2] = _Add - _Hd;
126 44cb64ee Mike Melanson
127 8b6103da Michael Niedermayer
            ip[3] = _Ed + _Dd ;
128
            ip[4] = _Ed - _Dd ;
129 44cb64ee Mike Melanson
130 8b6103da Michael Niedermayer
            ip[5] = _Fd + _Bdd;
131
            ip[6] = _Fd - _Bdd;
132 44cb64ee Mike Melanson
133
        }
134
135
        ip += 8;            /* next row */
136
    }
137 115329f1 Diego Biurrun
138 8b6103da Michael Niedermayer
    ip = input;
139 44cb64ee Mike Melanson
140
    for ( i = 0; i < 8; i++) {
141
        /* Check for non-zero values (bitwise or faster than ||) */
142 8b6103da Michael Niedermayer
        if ( ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
143 44cb64ee Mike Melanson
             ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
144
145
            t1 = (int32_t)(xC1S7 * ip[1*8]);
146
            t2 = (int32_t)(xC7S1 * ip[7*8]);
147
            t1 >>= 16;
148
            t2 >>= 16;
149
            A_ = t1 + t2;
150
151
            t1 = (int32_t)(xC7S1 * ip[1*8]);
152
            t2 = (int32_t)(xC1S7 * ip[7*8]);
153
            t1 >>= 16;
154
            t2 >>= 16;
155
            B_ = t1 - t2;
156
157
            t1 = (int32_t)(xC3S5 * ip[3*8]);
158
            t2 = (int32_t)(xC5S3 * ip[5*8]);
159
            t1 >>= 16;
160
            t2 >>= 16;
161
            C_ = t1 + t2;
162
163
            t1 = (int32_t)(xC3S5 * ip[5*8]);
164
            t2 = (int32_t)(xC5S3 * ip[3*8]);
165
            t1 >>= 16;
166
            t2 >>= 16;
167
            D_ = t1 - t2;
168
169
170
            t1 = (int32_t)(xC4S4 * (A_ - C_));
171
            t1 >>= 16;
172
            _Ad = t1;
173
174
            t1 = (int32_t)(xC4S4 * (B_ - D_));
175
            t1 >>= 16;
176
            _Bd = t1;
177
178
179
            _Cd = A_ + C_;
180
            _Dd = B_ + D_;
181
182
            t1 = (int32_t)(xC4S4 * (ip[0*8] + ip[4*8]));
183
            t1 >>= 16;
184
            E_ = t1;
185
186
            t1 = (int32_t)(xC4S4 * (ip[0*8] - ip[4*8]));
187
            t1 >>= 16;
188
            F_ = t1;
189
190
            t1 = (int32_t)(xC2S6 * ip[2*8]);
191
            t2 = (int32_t)(xC6S2 * ip[6*8]);
192
            t1 >>= 16;
193
            t2 >>= 16;
194
            G_ = t1 + t2;
195
196
            t1 = (int32_t)(xC6S2 * ip[2*8]);
197
            t2 = (int32_t)(xC2S6 * ip[6*8]);
198
            t1 >>= 16;
199
            t2 >>= 16;
200
            H_ = t1 - t2;
201
202
203
            _Ed = E_ - G_;
204
            _Gd = E_ + G_;
205
206
            _Add = F_ + _Ad;
207
            _Bdd = _Bd - H_;
208
209
            _Fd = F_ - _Ad;
210
            _Hd = _Bd + H_;
211
212 8b6103da Michael Niedermayer
            if(type==1){  //HACK
213
                _Gd += 16*128;
214
                _Add+= 16*128;
215
                _Ed += 16*128;
216
                _Fd += 16*128;
217
            }
218 44cb64ee Mike Melanson
            _Gd += IdctAdjustBeforeShift;
219
            _Add += IdctAdjustBeforeShift;
220
            _Ed += IdctAdjustBeforeShift;
221
            _Fd += IdctAdjustBeforeShift;
222
223
            /* Final sequence of operations over-write original inputs. */
224 8b6103da Michael Niedermayer
            if(type==0){
225
                ip[0*8] = (_Gd + _Cd )  >> 4;
226
                ip[7*8] = (_Gd - _Cd )  >> 4;
227 115329f1 Diego Biurrun
228 8b6103da Michael Niedermayer
                ip[1*8] = (_Add + _Hd ) >> 4;
229
                ip[2*8] = (_Add - _Hd ) >> 4;
230 115329f1 Diego Biurrun
231 8b6103da Michael Niedermayer
                ip[3*8] = (_Ed + _Dd )  >> 4;
232
                ip[4*8] = (_Ed - _Dd )  >> 4;
233 115329f1 Diego Biurrun
234 8b6103da Michael Niedermayer
                ip[5*8] = (_Fd + _Bdd ) >> 4;
235
                ip[6*8] = (_Fd - _Bdd ) >> 4;
236
            }else if(type==1){
237
                dst[0*stride] = cm[(_Gd + _Cd )  >> 4];
238
                dst[7*stride] = cm[(_Gd - _Cd )  >> 4];
239 115329f1 Diego Biurrun
240 8b6103da Michael Niedermayer
                dst[1*stride] = cm[(_Add + _Hd ) >> 4];
241
                dst[2*stride] = cm[(_Add - _Hd ) >> 4];
242 115329f1 Diego Biurrun
243 8b6103da Michael Niedermayer
                dst[3*stride] = cm[(_Ed + _Dd )  >> 4];
244
                dst[4*stride] = cm[(_Ed - _Dd )  >> 4];
245 115329f1 Diego Biurrun
246 8b6103da Michael Niedermayer
                dst[5*stride] = cm[(_Fd + _Bdd ) >> 4];
247
                dst[6*stride] = cm[(_Fd - _Bdd ) >> 4];
248
            }else{
249
                dst[0*stride] = cm[dst[0*stride] + ((_Gd + _Cd )  >> 4)];
250
                dst[7*stride] = cm[dst[7*stride] + ((_Gd - _Cd )  >> 4)];
251 115329f1 Diego Biurrun
252 8b6103da Michael Niedermayer
                dst[1*stride] = cm[dst[1*stride] + ((_Add + _Hd ) >> 4)];
253
                dst[2*stride] = cm[dst[2*stride] + ((_Add - _Hd ) >> 4)];
254 115329f1 Diego Biurrun
255 8b6103da Michael Niedermayer
                dst[3*stride] = cm[dst[3*stride] + ((_Ed + _Dd )  >> 4)];
256
                dst[4*stride] = cm[dst[4*stride] + ((_Ed - _Dd )  >> 4)];
257 115329f1 Diego Biurrun
258 8b6103da Michael Niedermayer
                dst[5*stride] = cm[dst[5*stride] + ((_Fd + _Bdd ) >> 4)];
259
                dst[6*stride] = cm[dst[6*stride] + ((_Fd - _Bdd ) >> 4)];
260
            }
261 44cb64ee Mike Melanson
262
        } else {
263 8b6103da Michael Niedermayer
            if(type==0){
264 115329f1 Diego Biurrun
                ip[0*8] =
265
                ip[1*8] =
266
                ip[2*8] =
267
                ip[3*8] =
268
                ip[4*8] =
269
                ip[5*8] =
270 8b6103da Michael Niedermayer
                ip[6*8] =
271
                ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
272
            }else if(type==1){
273
                dst[0*stride]=
274
                dst[1*stride]=
275
                dst[2*stride]=
276
                dst[3*stride]=
277
                dst[4*stride]=
278
                dst[5*stride]=
279
                dst[6*stride]=
280
                dst[7*stride]= 128 + ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
281
            }else{
282
                if(ip[0*8]){
283
                    int v= ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
284
                    dst[0*stride] = cm[dst[0*stride] + v];
285
                    dst[1*stride] = cm[dst[1*stride] + v];
286
                    dst[2*stride] = cm[dst[2*stride] + v];
287
                    dst[3*stride] = cm[dst[3*stride] + v];
288
                    dst[4*stride] = cm[dst[4*stride] + v];
289
                    dst[5*stride] = cm[dst[5*stride] + v];
290
                    dst[6*stride] = cm[dst[6*stride] + v];
291
                    dst[7*stride] = cm[dst[7*stride] + v];
292
                }
293
            }
294 44cb64ee Mike Melanson
        }
295
296
        ip++;            /* next column */
297 8b6103da Michael Niedermayer
        dst++;
298 44cb64ee Mike Melanson
    }
299
}
300 8b6103da Michael Niedermayer
301
void ff_vp3_idct_c(DCTELEM *block/* align 16*/){
302
    idct(NULL, 0, block, 0);
303
}
304 115329f1 Diego Biurrun
305 8b6103da Michael Niedermayer
void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
306
    idct(dest, line_size, block, 1);
307
}
308
309
void ff_vp3_idct_add_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
310
    idct(dest, line_size, block, 2);
311
}