ffmpeg / libavcodec / simple_idct.c @ 5509bffa
History | View | Annotate | Download (15.9 KB)
1 |
/*
|
---|---|
2 |
* Simple IDCT
|
3 |
*
|
4 |
* Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
|
5 |
*
|
6 |
* This library is free software; you can redistribute it and/or
|
7 |
* modify it under the terms of the GNU Lesser General Public
|
8 |
* License as published by the Free Software Foundation; either
|
9 |
* version 2 of the License, or (at your option) any later version.
|
10 |
*
|
11 |
* This library is distributed in the hope that it will be useful,
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
14 |
* Lesser General Public License for more details.
|
15 |
*
|
16 |
* You should have received a copy of the GNU Lesser General Public
|
17 |
* License along with this library; if not, write to the Free Software
|
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
19 |
*/
|
20 |
|
21 |
/**
|
22 |
* @file simple_idct.c
|
23 |
* simpleidct in C.
|
24 |
*/
|
25 |
|
26 |
/*
|
27 |
based upon some outcommented c code from mpeg2dec (idct_mmx.c
|
28 |
written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
|
29 |
*/
|
30 |
#include "avcodec.h" |
31 |
#include "dsputil.h" |
32 |
#include "simple_idct.h" |
33 |
|
34 |
#if 0
|
35 |
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
|
36 |
#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
|
37 |
#define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
|
38 |
#define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
|
39 |
#define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
|
40 |
#define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
|
41 |
#define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
|
42 |
#define ROW_SHIFT 8
|
43 |
#define COL_SHIFT 17
|
44 |
#else
|
45 |
#define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
46 |
#define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
47 |
#define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
48 |
#define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
49 |
#define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
50 |
#define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
51 |
#define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
52 |
#define ROW_SHIFT 11 |
53 |
#define COL_SHIFT 20 // 6 |
54 |
#endif
|
55 |
|
56 |
#if defined(ARCH_POWERPC_405)
|
57 |
|
58 |
/* signed 16x16 -> 32 multiply add accumulate */
|
59 |
#define MAC16(rt, ra, rb) \
|
60 |
asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb)); |
61 |
|
62 |
/* signed 16x16 -> 32 multiply */
|
63 |
#define MUL16(rt, ra, rb) \
|
64 |
asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb)); |
65 |
|
66 |
#else
|
67 |
|
68 |
/* signed 16x16 -> 32 multiply add accumulate */
|
69 |
#define MAC16(rt, ra, rb) rt += (ra) * (rb)
|
70 |
|
71 |
/* signed 16x16 -> 32 multiply */
|
72 |
#define MUL16(rt, ra, rb) rt = (ra) * (rb)
|
73 |
|
74 |
#endif
|
75 |
|
76 |
static inline void idctRowCondDC (DCTELEM * row) |
77 |
{ |
78 |
int a0, a1, a2, a3, b0, b1, b2, b3;
|
79 |
#ifdef FAST_64BIT
|
80 |
uint64_t temp; |
81 |
#else
|
82 |
uint32_t temp; |
83 |
#endif
|
84 |
|
85 |
#ifdef FAST_64BIT
|
86 |
#ifdef WORDS_BIGENDIAN
|
87 |
#define ROW0_MASK 0xffff000000000000LL |
88 |
#else
|
89 |
#define ROW0_MASK 0xffffLL |
90 |
#endif
|
91 |
if(sizeof(DCTELEM)==2){ |
92 |
if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | |
93 |
((uint64_t *)row)[1]) == 0) { |
94 |
temp = (row[0] << 3) & 0xffff; |
95 |
temp += temp << 16;
|
96 |
temp += temp << 32;
|
97 |
((uint64_t *)row)[0] = temp;
|
98 |
((uint64_t *)row)[1] = temp;
|
99 |
return;
|
100 |
} |
101 |
}else{
|
102 |
if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { |
103 |
row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; |
104 |
return;
|
105 |
} |
106 |
} |
107 |
#else
|
108 |
if(sizeof(DCTELEM)==2){ |
109 |
if (!(((uint32_t*)row)[1] | |
110 |
((uint32_t*)row)[2] |
|
111 |
((uint32_t*)row)[3] |
|
112 |
row[1])) {
|
113 |
temp = (row[0] << 3) & 0xffff; |
114 |
temp += temp << 16;
|
115 |
((uint32_t*)row)[0]=((uint32_t*)row)[1] = |
116 |
((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; |
117 |
return;
|
118 |
} |
119 |
}else{
|
120 |
if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) { |
121 |
row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3; |
122 |
return;
|
123 |
} |
124 |
} |
125 |
#endif
|
126 |
|
127 |
a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
128 |
a1 = a0; |
129 |
a2 = a0; |
130 |
a3 = a0; |
131 |
|
132 |
/* no need to optimize : gcc does it */
|
133 |
a0 += W2 * row[2];
|
134 |
a1 += W6 * row[2];
|
135 |
a2 -= W6 * row[2];
|
136 |
a3 -= W2 * row[2];
|
137 |
|
138 |
MUL16(b0, W1, row[1]);
|
139 |
MAC16(b0, W3, row[3]);
|
140 |
MUL16(b1, W3, row[1]);
|
141 |
MAC16(b1, -W7, row[3]);
|
142 |
MUL16(b2, W5, row[1]);
|
143 |
MAC16(b2, -W1, row[3]);
|
144 |
MUL16(b3, W7, row[1]);
|
145 |
MAC16(b3, -W5, row[3]);
|
146 |
|
147 |
#ifdef FAST_64BIT
|
148 |
temp = ((uint64_t*)row)[1];
|
149 |
#else
|
150 |
temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3]; |
151 |
#endif
|
152 |
if (temp != 0) { |
153 |
a0 += W4*row[4] + W6*row[6]; |
154 |
a1 += - W4*row[4] - W2*row[6]; |
155 |
a2 += - W4*row[4] + W2*row[6]; |
156 |
a3 += W4*row[4] - W6*row[6]; |
157 |
|
158 |
MAC16(b0, W5, row[5]);
|
159 |
MAC16(b0, W7, row[7]);
|
160 |
|
161 |
MAC16(b1, -W1, row[5]);
|
162 |
MAC16(b1, -W5, row[7]);
|
163 |
|
164 |
MAC16(b2, W7, row[5]);
|
165 |
MAC16(b2, W3, row[7]);
|
166 |
|
167 |
MAC16(b3, W3, row[5]);
|
168 |
MAC16(b3, -W1, row[7]);
|
169 |
} |
170 |
|
171 |
row[0] = (a0 + b0) >> ROW_SHIFT;
|
172 |
row[7] = (a0 - b0) >> ROW_SHIFT;
|
173 |
row[1] = (a1 + b1) >> ROW_SHIFT;
|
174 |
row[6] = (a1 - b1) >> ROW_SHIFT;
|
175 |
row[2] = (a2 + b2) >> ROW_SHIFT;
|
176 |
row[5] = (a2 - b2) >> ROW_SHIFT;
|
177 |
row[3] = (a3 + b3) >> ROW_SHIFT;
|
178 |
row[4] = (a3 - b3) >> ROW_SHIFT;
|
179 |
} |
180 |
|
181 |
static inline void idctSparseColPut (uint8_t *dest, int line_size, |
182 |
DCTELEM * col) |
183 |
{ |
184 |
int a0, a1, a2, a3, b0, b1, b2, b3;
|
185 |
uint8_t *cm = cropTbl + MAX_NEG_CROP; |
186 |
|
187 |
/* XXX: I did that only to give same values as previous code */
|
188 |
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
189 |
a1 = a0; |
190 |
a2 = a0; |
191 |
a3 = a0; |
192 |
|
193 |
a0 += + W2*col[8*2]; |
194 |
a1 += + W6*col[8*2]; |
195 |
a2 += - W6*col[8*2]; |
196 |
a3 += - W2*col[8*2]; |
197 |
|
198 |
MUL16(b0, W1, col[8*1]); |
199 |
MUL16(b1, W3, col[8*1]); |
200 |
MUL16(b2, W5, col[8*1]); |
201 |
MUL16(b3, W7, col[8*1]); |
202 |
|
203 |
MAC16(b0, + W3, col[8*3]); |
204 |
MAC16(b1, - W7, col[8*3]); |
205 |
MAC16(b2, - W1, col[8*3]); |
206 |
MAC16(b3, - W5, col[8*3]); |
207 |
|
208 |
if(col[8*4]){ |
209 |
a0 += + W4*col[8*4]; |
210 |
a1 += - W4*col[8*4]; |
211 |
a2 += - W4*col[8*4]; |
212 |
a3 += + W4*col[8*4]; |
213 |
} |
214 |
|
215 |
if (col[8*5]) { |
216 |
MAC16(b0, + W5, col[8*5]); |
217 |
MAC16(b1, - W1, col[8*5]); |
218 |
MAC16(b2, + W7, col[8*5]); |
219 |
MAC16(b3, + W3, col[8*5]); |
220 |
} |
221 |
|
222 |
if(col[8*6]){ |
223 |
a0 += + W6*col[8*6]; |
224 |
a1 += - W2*col[8*6]; |
225 |
a2 += + W2*col[8*6]; |
226 |
a3 += - W6*col[8*6]; |
227 |
} |
228 |
|
229 |
if (col[8*7]) { |
230 |
MAC16(b0, + W7, col[8*7]); |
231 |
MAC16(b1, - W5, col[8*7]); |
232 |
MAC16(b2, + W3, col[8*7]); |
233 |
MAC16(b3, - W1, col[8*7]); |
234 |
} |
235 |
|
236 |
dest[0] = cm[(a0 + b0) >> COL_SHIFT];
|
237 |
dest += line_size; |
238 |
dest[0] = cm[(a1 + b1) >> COL_SHIFT];
|
239 |
dest += line_size; |
240 |
dest[0] = cm[(a2 + b2) >> COL_SHIFT];
|
241 |
dest += line_size; |
242 |
dest[0] = cm[(a3 + b3) >> COL_SHIFT];
|
243 |
dest += line_size; |
244 |
dest[0] = cm[(a3 - b3) >> COL_SHIFT];
|
245 |
dest += line_size; |
246 |
dest[0] = cm[(a2 - b2) >> COL_SHIFT];
|
247 |
dest += line_size; |
248 |
dest[0] = cm[(a1 - b1) >> COL_SHIFT];
|
249 |
dest += line_size; |
250 |
dest[0] = cm[(a0 - b0) >> COL_SHIFT];
|
251 |
} |
252 |
|
253 |
static inline void idctSparseColAdd (uint8_t *dest, int line_size, |
254 |
DCTELEM * col) |
255 |
{ |
256 |
int a0, a1, a2, a3, b0, b1, b2, b3;
|
257 |
uint8_t *cm = cropTbl + MAX_NEG_CROP; |
258 |
|
259 |
/* XXX: I did that only to give same values as previous code */
|
260 |
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
261 |
a1 = a0; |
262 |
a2 = a0; |
263 |
a3 = a0; |
264 |
|
265 |
a0 += + W2*col[8*2]; |
266 |
a1 += + W6*col[8*2]; |
267 |
a2 += - W6*col[8*2]; |
268 |
a3 += - W2*col[8*2]; |
269 |
|
270 |
MUL16(b0, W1, col[8*1]); |
271 |
MUL16(b1, W3, col[8*1]); |
272 |
MUL16(b2, W5, col[8*1]); |
273 |
MUL16(b3, W7, col[8*1]); |
274 |
|
275 |
MAC16(b0, + W3, col[8*3]); |
276 |
MAC16(b1, - W7, col[8*3]); |
277 |
MAC16(b2, - W1, col[8*3]); |
278 |
MAC16(b3, - W5, col[8*3]); |
279 |
|
280 |
if(col[8*4]){ |
281 |
a0 += + W4*col[8*4]; |
282 |
a1 += - W4*col[8*4]; |
283 |
a2 += - W4*col[8*4]; |
284 |
a3 += + W4*col[8*4]; |
285 |
} |
286 |
|
287 |
if (col[8*5]) { |
288 |
MAC16(b0, + W5, col[8*5]); |
289 |
MAC16(b1, - W1, col[8*5]); |
290 |
MAC16(b2, + W7, col[8*5]); |
291 |
MAC16(b3, + W3, col[8*5]); |
292 |
} |
293 |
|
294 |
if(col[8*6]){ |
295 |
a0 += + W6*col[8*6]; |
296 |
a1 += - W2*col[8*6]; |
297 |
a2 += + W2*col[8*6]; |
298 |
a3 += - W6*col[8*6]; |
299 |
} |
300 |
|
301 |
if (col[8*7]) { |
302 |
MAC16(b0, + W7, col[8*7]); |
303 |
MAC16(b1, - W5, col[8*7]); |
304 |
MAC16(b2, + W3, col[8*7]); |
305 |
MAC16(b3, - W1, col[8*7]); |
306 |
} |
307 |
|
308 |
dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)]; |
309 |
dest += line_size; |
310 |
dest[0] = cm[dest[0] + ((a1 + b1) >> COL_SHIFT)]; |
311 |
dest += line_size; |
312 |
dest[0] = cm[dest[0] + ((a2 + b2) >> COL_SHIFT)]; |
313 |
dest += line_size; |
314 |
dest[0] = cm[dest[0] + ((a3 + b3) >> COL_SHIFT)]; |
315 |
dest += line_size; |
316 |
dest[0] = cm[dest[0] + ((a3 - b3) >> COL_SHIFT)]; |
317 |
dest += line_size; |
318 |
dest[0] = cm[dest[0] + ((a2 - b2) >> COL_SHIFT)]; |
319 |
dest += line_size; |
320 |
dest[0] = cm[dest[0] + ((a1 - b1) >> COL_SHIFT)]; |
321 |
dest += line_size; |
322 |
dest[0] = cm[dest[0] + ((a0 - b0) >> COL_SHIFT)]; |
323 |
} |
324 |
|
325 |
static inline void idctSparseCol (DCTELEM * col) |
326 |
{ |
327 |
int a0, a1, a2, a3, b0, b1, b2, b3;
|
328 |
|
329 |
/* XXX: I did that only to give same values as previous code */
|
330 |
a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); |
331 |
a1 = a0; |
332 |
a2 = a0; |
333 |
a3 = a0; |
334 |
|
335 |
a0 += + W2*col[8*2]; |
336 |
a1 += + W6*col[8*2]; |
337 |
a2 += - W6*col[8*2]; |
338 |
a3 += - W2*col[8*2]; |
339 |
|
340 |
MUL16(b0, W1, col[8*1]); |
341 |
MUL16(b1, W3, col[8*1]); |
342 |
MUL16(b2, W5, col[8*1]); |
343 |
MUL16(b3, W7, col[8*1]); |
344 |
|
345 |
MAC16(b0, + W3, col[8*3]); |
346 |
MAC16(b1, - W7, col[8*3]); |
347 |
MAC16(b2, - W1, col[8*3]); |
348 |
MAC16(b3, - W5, col[8*3]); |
349 |
|
350 |
if(col[8*4]){ |
351 |
a0 += + W4*col[8*4]; |
352 |
a1 += - W4*col[8*4]; |
353 |
a2 += - W4*col[8*4]; |
354 |
a3 += + W4*col[8*4]; |
355 |
} |
356 |
|
357 |
if (col[8*5]) { |
358 |
MAC16(b0, + W5, col[8*5]); |
359 |
MAC16(b1, - W1, col[8*5]); |
360 |
MAC16(b2, + W7, col[8*5]); |
361 |
MAC16(b3, + W3, col[8*5]); |
362 |
} |
363 |
|
364 |
if(col[8*6]){ |
365 |
a0 += + W6*col[8*6]; |
366 |
a1 += - W2*col[8*6]; |
367 |
a2 += + W2*col[8*6]; |
368 |
a3 += - W6*col[8*6]; |
369 |
} |
370 |
|
371 |
if (col[8*7]) { |
372 |
MAC16(b0, + W7, col[8*7]); |
373 |
MAC16(b1, - W5, col[8*7]); |
374 |
MAC16(b2, + W3, col[8*7]); |
375 |
MAC16(b3, - W1, col[8*7]); |
376 |
} |
377 |
|
378 |
col[0 ] = ((a0 + b0) >> COL_SHIFT);
|
379 |
col[8 ] = ((a1 + b1) >> COL_SHIFT);
|
380 |
col[16] = ((a2 + b2) >> COL_SHIFT);
|
381 |
col[24] = ((a3 + b3) >> COL_SHIFT);
|
382 |
col[32] = ((a3 - b3) >> COL_SHIFT);
|
383 |
col[40] = ((a2 - b2) >> COL_SHIFT);
|
384 |
col[48] = ((a1 - b1) >> COL_SHIFT);
|
385 |
col[56] = ((a0 - b0) >> COL_SHIFT);
|
386 |
} |
387 |
|
388 |
void simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block) |
389 |
{ |
390 |
int i;
|
391 |
for(i=0; i<8; i++) |
392 |
idctRowCondDC(block + i*8);
|
393 |
|
394 |
for(i=0; i<8; i++) |
395 |
idctSparseColPut(dest + i, line_size, block + i); |
396 |
} |
397 |
|
398 |
void simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block) |
399 |
{ |
400 |
int i;
|
401 |
for(i=0; i<8; i++) |
402 |
idctRowCondDC(block + i*8);
|
403 |
|
404 |
for(i=0; i<8; i++) |
405 |
idctSparseColAdd(dest + i, line_size, block + i); |
406 |
} |
407 |
|
408 |
void simple_idct(DCTELEM *block)
|
409 |
{ |
410 |
int i;
|
411 |
for(i=0; i<8; i++) |
412 |
idctRowCondDC(block + i*8);
|
413 |
|
414 |
for(i=0; i<8; i++) |
415 |
idctSparseCol(block + i); |
416 |
} |
417 |
|
418 |
/* 2x4x8 idct */
|
419 |
|
420 |
#define CN_SHIFT 12 |
421 |
#define C_FIX(x) ((int)((x) * (1 << CN_SHIFT) + 0.5)) |
422 |
#define C1 C_FIX(0.6532814824) |
423 |
#define C2 C_FIX(0.2705980501) |
424 |
|
425 |
/* row idct is multiple by 16 * sqrt(2.0), col idct4 is normalized,
|
426 |
and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
|
427 |
#define C_SHIFT (4+1+12) |
428 |
|
429 |
static inline void idct4col(uint8_t *dest, int line_size, const DCTELEM *col) |
430 |
{ |
431 |
int c0, c1, c2, c3, a0, a1, a2, a3;
|
432 |
const uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
433 |
|
434 |
a0 = col[8*0]; |
435 |
a1 = col[8*2]; |
436 |
a2 = col[8*4]; |
437 |
a3 = col[8*6]; |
438 |
c0 = ((a0 + a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |
439 |
c2 = ((a0 - a2) << (CN_SHIFT - 1)) + (1 << (C_SHIFT - 1)); |
440 |
c1 = a1 * C1 + a3 * C2; |
441 |
c3 = a1 * C2 - a3 * C1; |
442 |
dest[0] = cm[(c0 + c1) >> C_SHIFT];
|
443 |
dest += line_size; |
444 |
dest[0] = cm[(c2 + c3) >> C_SHIFT];
|
445 |
dest += line_size; |
446 |
dest[0] = cm[(c2 - c3) >> C_SHIFT];
|
447 |
dest += line_size; |
448 |
dest[0] = cm[(c0 - c1) >> C_SHIFT];
|
449 |
} |
450 |
|
451 |
#define BF(k) \
|
452 |
{\ |
453 |
int a0, a1;\
|
454 |
a0 = ptr[k];\ |
455 |
a1 = ptr[8 + k];\
|
456 |
ptr[k] = a0 + a1;\ |
457 |
ptr[8 + k] = a0 - a1;\
|
458 |
} |
459 |
|
460 |
/* only used by DV codec. The input must be interlaced. 128 is added
|
461 |
to the pixels before clamping to avoid systematic error
|
462 |
(1024*sqrt(2)) offset would be needed otherwise. */
|
463 |
/* XXX: I think a 1.0/sqrt(2) normalization should be needed to
|
464 |
compensate the extra butterfly stage - I don't have the full DV
|
465 |
specification */
|
466 |
void simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block) |
467 |
{ |
468 |
int i;
|
469 |
DCTELEM *ptr; |
470 |
|
471 |
/* butterfly */
|
472 |
ptr = block; |
473 |
for(i=0;i<4;i++) { |
474 |
BF(0);
|
475 |
BF(1);
|
476 |
BF(2);
|
477 |
BF(3);
|
478 |
BF(4);
|
479 |
BF(5);
|
480 |
BF(6);
|
481 |
BF(7);
|
482 |
ptr += 2 * 8; |
483 |
} |
484 |
|
485 |
/* IDCT8 on each line */
|
486 |
for(i=0; i<8; i++) { |
487 |
idctRowCondDC(block + i*8);
|
488 |
} |
489 |
|
490 |
/* IDCT4 and store */
|
491 |
for(i=0;i<8;i++) { |
492 |
idct4col(dest + i, 2 * line_size, block + i);
|
493 |
idct4col(dest + line_size + i, 2 * line_size, block + 8 + i); |
494 |
} |
495 |
} |
496 |
|
497 |
/* 8x4 & 4x8 WMV2 IDCT */
|
498 |
#undef CN_SHIFT
|
499 |
#undef C_SHIFT
|
500 |
#undef C_FIX
|
501 |
#undef C1
|
502 |
#undef C2
|
503 |
#define CN_SHIFT 12 |
504 |
#define C_FIX(x) ((int)((x) * 1.414213562 * (1 << CN_SHIFT) + 0.5)) |
505 |
#define C1 C_FIX(0.6532814824) |
506 |
#define C2 C_FIX(0.2705980501) |
507 |
#define C3 C_FIX(0.5) |
508 |
#define C_SHIFT (4+1+12) |
509 |
static inline void idct4col_add(uint8_t *dest, int line_size, const DCTELEM *col) |
510 |
{ |
511 |
int c0, c1, c2, c3, a0, a1, a2, a3;
|
512 |
const uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
513 |
|
514 |
a0 = col[8*0]; |
515 |
a1 = col[8*1]; |
516 |
a2 = col[8*2]; |
517 |
a3 = col[8*3]; |
518 |
c0 = (a0 + a2)*C3 + (1 << (C_SHIFT - 1)); |
519 |
c2 = (a0 - a2)*C3 + (1 << (C_SHIFT - 1)); |
520 |
c1 = a1 * C1 + a3 * C2; |
521 |
c3 = a1 * C2 - a3 * C1; |
522 |
dest[0] = cm[dest[0] + ((c0 + c1) >> C_SHIFT)]; |
523 |
dest += line_size; |
524 |
dest[0] = cm[dest[0] + ((c2 + c3) >> C_SHIFT)]; |
525 |
dest += line_size; |
526 |
dest[0] = cm[dest[0] + ((c2 - c3) >> C_SHIFT)]; |
527 |
dest += line_size; |
528 |
dest[0] = cm[dest[0] + ((c0 - c1) >> C_SHIFT)]; |
529 |
} |
530 |
|
531 |
#define RN_SHIFT 15 |
532 |
#define R_FIX(x) ((int)((x) * 1.414213562 * (1 << RN_SHIFT) + 0.5)) |
533 |
#define R1 R_FIX(0.6532814824) |
534 |
#define R2 R_FIX(0.2705980501) |
535 |
#define R3 R_FIX(0.5) |
536 |
#define R_SHIFT 11 |
537 |
static inline void idct4row(DCTELEM *row) |
538 |
{ |
539 |
int c0, c1, c2, c3, a0, a1, a2, a3;
|
540 |
//const uint8_t *cm = cropTbl + MAX_NEG_CROP;
|
541 |
|
542 |
a0 = row[0];
|
543 |
a1 = row[1];
|
544 |
a2 = row[2];
|
545 |
a3 = row[3];
|
546 |
c0 = (a0 + a2)*R3 + (1 << (R_SHIFT - 1)); |
547 |
c2 = (a0 - a2)*R3 + (1 << (R_SHIFT - 1)); |
548 |
c1 = a1 * R1 + a3 * R2; |
549 |
c3 = a1 * R2 - a3 * R1; |
550 |
row[0]= (c0 + c1) >> R_SHIFT;
|
551 |
row[1]= (c2 + c3) >> R_SHIFT;
|
552 |
row[2]= (c2 - c3) >> R_SHIFT;
|
553 |
row[3]= (c0 - c1) >> R_SHIFT;
|
554 |
} |
555 |
|
556 |
void simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block) |
557 |
{ |
558 |
int i;
|
559 |
|
560 |
/* IDCT8 on each line */
|
561 |
for(i=0; i<4; i++) { |
562 |
idctRowCondDC(block + i*8);
|
563 |
} |
564 |
|
565 |
/* IDCT4 and store */
|
566 |
for(i=0;i<8;i++) { |
567 |
idct4col_add(dest + i, line_size, block + i); |
568 |
} |
569 |
} |
570 |
|
571 |
void simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block) |
572 |
{ |
573 |
int i;
|
574 |
|
575 |
/* IDCT4 on each line */
|
576 |
for(i=0; i<8; i++) { |
577 |
idct4row(block + i*8);
|
578 |
} |
579 |
|
580 |
/* IDCT8 and store */
|
581 |
for(i=0; i<4; i++){ |
582 |
idctSparseColAdd(dest + i, line_size, block + i); |
583 |
} |
584 |
} |
585 |
|