ffmpeg / libavcodec / vc1dsp.c @ f66e4f5f
History | View | Annotate | Download (13.3 KB)
1 |
/*
|
---|---|
2 |
* VC-1 and WMV3 decoder - DSP functions
|
3 |
* Copyright (c) 2006 Konstantin Shishkov
|
4 |
*
|
5 |
* This file is part of FFmpeg.
|
6 |
*
|
7 |
* FFmpeg is free software; you can redistribute it and/or
|
8 |
* modify it under the terms of the GNU Lesser General Public
|
9 |
* License as published by the Free Software Foundation; either
|
10 |
* version 2.1 of the License, or (at your option) any later version.
|
11 |
*
|
12 |
* FFmpeg is distributed in the hope that it will be useful,
|
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15 |
* Lesser General Public License for more details.
|
16 |
*
|
17 |
* You should have received a copy of the GNU Lesser General Public
|
18 |
* License along with FFmpeg; if not, write to the Free Software
|
19 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
20 |
*
|
21 |
*/
|
22 |
|
23 |
/**
|
24 |
* @file vc1dsp.c
|
25 |
* VC-1 and WMV3 decoder
|
26 |
*
|
27 |
*/
|
28 |
|
29 |
#include "dsputil.h" |
30 |
|
31 |
|
32 |
/** Apply overlap transform to horizontal edge
|
33 |
*/
|
34 |
static void vc1_v_overlap_c(uint8_t* src, int stride) |
35 |
{ |
36 |
int i;
|
37 |
int a, b, c, d;
|
38 |
int d1, d2;
|
39 |
int rnd = 1; |
40 |
for(i = 0; i < 8; i++) { |
41 |
a = src[-2*stride];
|
42 |
b = src[-stride]; |
43 |
c = src[0];
|
44 |
d = src[stride]; |
45 |
d1 = (a - d + 3 + rnd) >> 3; |
46 |
d2 = (a - d + b - c + 4 - rnd) >> 3; |
47 |
|
48 |
src[-2*stride] = a - d1;
|
49 |
src[-stride] = b - d2; |
50 |
src[0] = c + d2;
|
51 |
src[stride] = d + d1; |
52 |
src++; |
53 |
rnd = !rnd; |
54 |
} |
55 |
} |
56 |
|
57 |
/** Apply overlap transform to vertical edge
|
58 |
*/
|
59 |
static void vc1_h_overlap_c(uint8_t* src, int stride) |
60 |
{ |
61 |
int i;
|
62 |
int a, b, c, d;
|
63 |
int d1, d2;
|
64 |
int rnd = 1; |
65 |
for(i = 0; i < 8; i++) { |
66 |
a = src[-2];
|
67 |
b = src[-1];
|
68 |
c = src[0];
|
69 |
d = src[1];
|
70 |
d1 = (a - d + 3 + rnd) >> 3; |
71 |
d2 = (a - d + b - c + 4 - rnd) >> 3; |
72 |
|
73 |
src[-2] = a - d1;
|
74 |
src[-1] = b - d2;
|
75 |
src[0] = c + d2;
|
76 |
src[1] = d + d1;
|
77 |
src += stride; |
78 |
rnd = !rnd; |
79 |
} |
80 |
} |
81 |
|
82 |
|
83 |
/** Do inverse transform on 8x8 block
|
84 |
*/
|
85 |
static void vc1_inv_trans_8x8_c(DCTELEM block[64]) |
86 |
{ |
87 |
int i;
|
88 |
register int t1,t2,t3,t4,t5,t6,t7,t8; |
89 |
DCTELEM *src, *dst; |
90 |
|
91 |
src = block; |
92 |
dst = block; |
93 |
for(i = 0; i < 8; i++){ |
94 |
t1 = 12 * (src[0] + src[4]); |
95 |
t2 = 12 * (src[0] - src[4]); |
96 |
t3 = 16 * src[2] + 6 * src[6]; |
97 |
t4 = 6 * src[2] - 16 * src[6]; |
98 |
|
99 |
t5 = t1 + t3; |
100 |
t6 = t2 + t4; |
101 |
t7 = t2 - t4; |
102 |
t8 = t1 - t3; |
103 |
|
104 |
t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; |
105 |
t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; |
106 |
t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; |
107 |
t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; |
108 |
|
109 |
dst[0] = (t5 + t1 + 4) >> 3; |
110 |
dst[1] = (t6 + t2 + 4) >> 3; |
111 |
dst[2] = (t7 + t3 + 4) >> 3; |
112 |
dst[3] = (t8 + t4 + 4) >> 3; |
113 |
dst[4] = (t8 - t4 + 4) >> 3; |
114 |
dst[5] = (t7 - t3 + 4) >> 3; |
115 |
dst[6] = (t6 - t2 + 4) >> 3; |
116 |
dst[7] = (t5 - t1 + 4) >> 3; |
117 |
|
118 |
src += 8;
|
119 |
dst += 8;
|
120 |
} |
121 |
|
122 |
src = block; |
123 |
dst = block; |
124 |
for(i = 0; i < 8; i++){ |
125 |
t1 = 12 * (src[ 0] + src[32]); |
126 |
t2 = 12 * (src[ 0] - src[32]); |
127 |
t3 = 16 * src[16] + 6 * src[48]; |
128 |
t4 = 6 * src[16] - 16 * src[48]; |
129 |
|
130 |
t5 = t1 + t3; |
131 |
t6 = t2 + t4; |
132 |
t7 = t2 - t4; |
133 |
t8 = t1 - t3; |
134 |
|
135 |
t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; |
136 |
t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; |
137 |
t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; |
138 |
t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; |
139 |
|
140 |
dst[ 0] = (t5 + t1 + 64) >> 7; |
141 |
dst[ 8] = (t6 + t2 + 64) >> 7; |
142 |
dst[16] = (t7 + t3 + 64) >> 7; |
143 |
dst[24] = (t8 + t4 + 64) >> 7; |
144 |
dst[32] = (t8 - t4 + 64 + 1) >> 7; |
145 |
dst[40] = (t7 - t3 + 64 + 1) >> 7; |
146 |
dst[48] = (t6 - t2 + 64 + 1) >> 7; |
147 |
dst[56] = (t5 - t1 + 64 + 1) >> 7; |
148 |
|
149 |
src++; |
150 |
dst++; |
151 |
} |
152 |
} |
153 |
|
154 |
/** Do inverse transform on 8x4 part of block
|
155 |
*/
|
156 |
static void vc1_inv_trans_8x4_c(DCTELEM block[64], int n) |
157 |
{ |
158 |
int i;
|
159 |
register int t1,t2,t3,t4,t5,t6,t7,t8; |
160 |
DCTELEM *src, *dst; |
161 |
int off;
|
162 |
|
163 |
off = n * 32;
|
164 |
src = block + off; |
165 |
dst = block + off; |
166 |
for(i = 0; i < 4; i++){ |
167 |
t1 = 12 * (src[0] + src[4]); |
168 |
t2 = 12 * (src[0] - src[4]); |
169 |
t3 = 16 * src[2] + 6 * src[6]; |
170 |
t4 = 6 * src[2] - 16 * src[6]; |
171 |
|
172 |
t5 = t1 + t3; |
173 |
t6 = t2 + t4; |
174 |
t7 = t2 - t4; |
175 |
t8 = t1 - t3; |
176 |
|
177 |
t1 = 16 * src[1] + 15 * src[3] + 9 * src[5] + 4 * src[7]; |
178 |
t2 = 15 * src[1] - 4 * src[3] - 16 * src[5] - 9 * src[7]; |
179 |
t3 = 9 * src[1] - 16 * src[3] + 4 * src[5] + 15 * src[7]; |
180 |
t4 = 4 * src[1] - 9 * src[3] + 15 * src[5] - 16 * src[7]; |
181 |
|
182 |
dst[0] = (t5 + t1 + 4) >> 3; |
183 |
dst[1] = (t6 + t2 + 4) >> 3; |
184 |
dst[2] = (t7 + t3 + 4) >> 3; |
185 |
dst[3] = (t8 + t4 + 4) >> 3; |
186 |
dst[4] = (t8 - t4 + 4) >> 3; |
187 |
dst[5] = (t7 - t3 + 4) >> 3; |
188 |
dst[6] = (t6 - t2 + 4) >> 3; |
189 |
dst[7] = (t5 - t1 + 4) >> 3; |
190 |
|
191 |
src += 8;
|
192 |
dst += 8;
|
193 |
} |
194 |
|
195 |
src = block + off; |
196 |
dst = block + off; |
197 |
for(i = 0; i < 8; i++){ |
198 |
t1 = 17 * (src[ 0] + src[16]); |
199 |
t2 = 17 * (src[ 0] - src[16]); |
200 |
t3 = 22 * src[ 8]; |
201 |
t4 = 22 * src[24]; |
202 |
t5 = 10 * src[ 8]; |
203 |
t6 = 10 * src[24]; |
204 |
|
205 |
dst[ 0] = (t1 + t3 + t6 + 64) >> 7; |
206 |
dst[ 8] = (t2 - t4 + t5 + 64) >> 7; |
207 |
dst[16] = (t2 + t4 - t5 + 64) >> 7; |
208 |
dst[24] = (t1 - t3 - t6 + 64) >> 7; |
209 |
|
210 |
src ++; |
211 |
dst ++; |
212 |
} |
213 |
} |
214 |
|
215 |
/** Do inverse transform on 4x8 parts of block
|
216 |
*/
|
217 |
static void vc1_inv_trans_4x8_c(DCTELEM block[64], int n) |
218 |
{ |
219 |
int i;
|
220 |
register int t1,t2,t3,t4,t5,t6,t7,t8; |
221 |
DCTELEM *src, *dst; |
222 |
int off;
|
223 |
|
224 |
off = n * 4;
|
225 |
src = block + off; |
226 |
dst = block + off; |
227 |
for(i = 0; i < 8; i++){ |
228 |
t1 = 17 * (src[0] + src[2]); |
229 |
t2 = 17 * (src[0] - src[2]); |
230 |
t3 = 22 * src[1]; |
231 |
t4 = 22 * src[3]; |
232 |
t5 = 10 * src[1]; |
233 |
t6 = 10 * src[3]; |
234 |
|
235 |
dst[0] = (t1 + t3 + t6 + 4) >> 3; |
236 |
dst[1] = (t2 - t4 + t5 + 4) >> 3; |
237 |
dst[2] = (t2 + t4 - t5 + 4) >> 3; |
238 |
dst[3] = (t1 - t3 - t6 + 4) >> 3; |
239 |
|
240 |
src += 8;
|
241 |
dst += 8;
|
242 |
} |
243 |
|
244 |
src = block + off; |
245 |
dst = block + off; |
246 |
for(i = 0; i < 4; i++){ |
247 |
t1 = 12 * (src[ 0] + src[32]); |
248 |
t2 = 12 * (src[ 0] - src[32]); |
249 |
t3 = 16 * src[16] + 6 * src[48]; |
250 |
t4 = 6 * src[16] - 16 * src[48]; |
251 |
|
252 |
t5 = t1 + t3; |
253 |
t6 = t2 + t4; |
254 |
t7 = t2 - t4; |
255 |
t8 = t1 - t3; |
256 |
|
257 |
t1 = 16 * src[ 8] + 15 * src[24] + 9 * src[40] + 4 * src[56]; |
258 |
t2 = 15 * src[ 8] - 4 * src[24] - 16 * src[40] - 9 * src[56]; |
259 |
t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; |
260 |
t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; |
261 |
|
262 |
dst[ 0] = (t5 + t1 + 64) >> 7; |
263 |
dst[ 8] = (t6 + t2 + 64) >> 7; |
264 |
dst[16] = (t7 + t3 + 64) >> 7; |
265 |
dst[24] = (t8 + t4 + 64) >> 7; |
266 |
dst[32] = (t8 - t4 + 64 + 1) >> 7; |
267 |
dst[40] = (t7 - t3 + 64 + 1) >> 7; |
268 |
dst[48] = (t6 - t2 + 64 + 1) >> 7; |
269 |
dst[56] = (t5 - t1 + 64 + 1) >> 7; |
270 |
|
271 |
src++; |
272 |
dst++; |
273 |
} |
274 |
} |
275 |
|
276 |
/** Do inverse transform on 4x4 part of block
|
277 |
*/
|
278 |
static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n) |
279 |
{ |
280 |
int i;
|
281 |
register int t1,t2,t3,t4,t5,t6; |
282 |
DCTELEM *src, *dst; |
283 |
int off;
|
284 |
|
285 |
off = (n&1) * 4 + (n&2) * 16; |
286 |
src = block + off; |
287 |
dst = block + off; |
288 |
for(i = 0; i < 4; i++){ |
289 |
t1 = 17 * (src[0] + src[2]); |
290 |
t2 = 17 * (src[0] - src[2]); |
291 |
t3 = 22 * src[1]; |
292 |
t4 = 22 * src[3]; |
293 |
t5 = 10 * src[1]; |
294 |
t6 = 10 * src[3]; |
295 |
|
296 |
dst[0] = (t1 + t3 + t6 + 4) >> 3; |
297 |
dst[1] = (t2 - t4 + t5 + 4) >> 3; |
298 |
dst[2] = (t2 + t4 - t5 + 4) >> 3; |
299 |
dst[3] = (t1 - t3 - t6 + 4) >> 3; |
300 |
|
301 |
src += 8;
|
302 |
dst += 8;
|
303 |
} |
304 |
|
305 |
src = block + off; |
306 |
dst = block + off; |
307 |
for(i = 0; i < 4; i++){ |
308 |
t1 = 17 * (src[ 0] + src[16]); |
309 |
t2 = 17 * (src[ 0] - src[16]); |
310 |
t3 = 22 * src[ 8]; |
311 |
t4 = 22 * src[24]; |
312 |
t5 = 10 * src[ 8]; |
313 |
t6 = 10 * src[24]; |
314 |
|
315 |
dst[ 0] = (t1 + t3 + t6 + 64) >> 7; |
316 |
dst[ 8] = (t2 - t4 + t5 + 64) >> 7; |
317 |
dst[16] = (t2 + t4 - t5 + 64) >> 7; |
318 |
dst[24] = (t1 - t3 - t6 + 64) >> 7; |
319 |
|
320 |
src ++; |
321 |
dst ++; |
322 |
} |
323 |
} |
324 |
|
325 |
/* motion compensation functions */
|
326 |
|
327 |
/** Filter used to interpolate fractional pel values
|
328 |
*/
|
329 |
static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r) |
330 |
{ |
331 |
switch(mode){
|
332 |
case 0: //no shift |
333 |
return src[0]; |
334 |
case 1: // 1/4 shift |
335 |
return (-4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2] + 32 - r) >> 6; |
336 |
case 2: // 1/2 shift |
337 |
return (-src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2] + 8 - r) >> 4; |
338 |
case 3: // 3/4 shift |
339 |
return (-3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2] + 32 - r) >> 6; |
340 |
} |
341 |
return 0; //should not occur |
342 |
} |
343 |
|
344 |
/** Function used to do motion compensation with bicubic interpolation
|
345 |
*/
|
346 |
static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int mode, int rnd) |
347 |
{ |
348 |
int i, j;
|
349 |
uint8_t tmp[8*11], *tptr; |
350 |
int m, r;
|
351 |
|
352 |
m = (mode & 3);
|
353 |
r = rnd; |
354 |
src -= stride; |
355 |
tptr = tmp; |
356 |
for(j = 0; j < 11; j++) { |
357 |
for(i = 0; i < 8; i++) |
358 |
tptr[i] = av_clip_uint8(vc1_mspel_filter(src + i, 1, m, r));
|
359 |
src += stride; |
360 |
tptr += 8;
|
361 |
} |
362 |
r = 1 - rnd;
|
363 |
m = (mode >> 2) & 3; |
364 |
|
365 |
tptr = tmp + 8;
|
366 |
for(j = 0; j < 8; j++) { |
367 |
for(i = 0; i < 8; i++) |
368 |
dst[i] = av_clip_uint8(vc1_mspel_filter(tptr + i, 8, m, r));
|
369 |
dst += stride; |
370 |
tptr += 8;
|
371 |
} |
372 |
} |
373 |
|
374 |
/* pixel functions - really are entry points to vc1_mspel_mc */
|
375 |
|
376 |
/* this one is defined in dsputil.c */
|
377 |
void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd); |
378 |
|
379 |
static void ff_put_vc1_mspel_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
380 |
vc1_mspel_mc(dst, src, stride, 0x1, rnd);
|
381 |
} |
382 |
|
383 |
static void ff_put_vc1_mspel_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
384 |
vc1_mspel_mc(dst, src, stride, 0x2, rnd);
|
385 |
} |
386 |
|
387 |
static void ff_put_vc1_mspel_mc30_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
388 |
vc1_mspel_mc(dst, src, stride, 0x3, rnd);
|
389 |
} |
390 |
|
391 |
static void ff_put_vc1_mspel_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
392 |
vc1_mspel_mc(dst, src, stride, 0x4, rnd);
|
393 |
} |
394 |
|
395 |
static void ff_put_vc1_mspel_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
396 |
vc1_mspel_mc(dst, src, stride, 0x5, rnd);
|
397 |
} |
398 |
|
399 |
static void ff_put_vc1_mspel_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
400 |
vc1_mspel_mc(dst, src, stride, 0x6, rnd);
|
401 |
} |
402 |
|
403 |
static void ff_put_vc1_mspel_mc31_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
404 |
vc1_mspel_mc(dst, src, stride, 0x7, rnd);
|
405 |
} |
406 |
|
407 |
static void ff_put_vc1_mspel_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
408 |
vc1_mspel_mc(dst, src, stride, 0x8, rnd);
|
409 |
} |
410 |
|
411 |
static void ff_put_vc1_mspel_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
412 |
vc1_mspel_mc(dst, src, stride, 0x9, rnd);
|
413 |
} |
414 |
|
415 |
static void ff_put_vc1_mspel_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
416 |
vc1_mspel_mc(dst, src, stride, 0xA, rnd);
|
417 |
} |
418 |
|
419 |
static void ff_put_vc1_mspel_mc32_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
420 |
vc1_mspel_mc(dst, src, stride, 0xB, rnd);
|
421 |
} |
422 |
|
423 |
static void ff_put_vc1_mspel_mc03_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
424 |
vc1_mspel_mc(dst, src, stride, 0xC, rnd);
|
425 |
} |
426 |
|
427 |
static void ff_put_vc1_mspel_mc13_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
428 |
vc1_mspel_mc(dst, src, stride, 0xD, rnd);
|
429 |
} |
430 |
|
431 |
static void ff_put_vc1_mspel_mc23_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
432 |
vc1_mspel_mc(dst, src, stride, 0xE, rnd);
|
433 |
} |
434 |
|
435 |
static void ff_put_vc1_mspel_mc33_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
436 |
vc1_mspel_mc(dst, src, stride, 0xF, rnd);
|
437 |
} |
438 |
|
439 |
void ff_vc1dsp_init(DSPContext* dsp, AVCodecContext *avctx) {
|
440 |
dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c; |
441 |
dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c; |
442 |
dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c; |
443 |
dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c; |
444 |
dsp->vc1_h_overlap = vc1_h_overlap_c; |
445 |
dsp->vc1_v_overlap = vc1_v_overlap_c; |
446 |
|
447 |
dsp->put_vc1_mspel_pixels_tab[ 0] = ff_put_vc1_mspel_mc00_c;
|
448 |
dsp->put_vc1_mspel_pixels_tab[ 1] = ff_put_vc1_mspel_mc10_c;
|
449 |
dsp->put_vc1_mspel_pixels_tab[ 2] = ff_put_vc1_mspel_mc20_c;
|
450 |
dsp->put_vc1_mspel_pixels_tab[ 3] = ff_put_vc1_mspel_mc30_c;
|
451 |
dsp->put_vc1_mspel_pixels_tab[ 4] = ff_put_vc1_mspel_mc01_c;
|
452 |
dsp->put_vc1_mspel_pixels_tab[ 5] = ff_put_vc1_mspel_mc11_c;
|
453 |
dsp->put_vc1_mspel_pixels_tab[ 6] = ff_put_vc1_mspel_mc21_c;
|
454 |
dsp->put_vc1_mspel_pixels_tab[ 7] = ff_put_vc1_mspel_mc31_c;
|
455 |
dsp->put_vc1_mspel_pixels_tab[ 8] = ff_put_vc1_mspel_mc02_c;
|
456 |
dsp->put_vc1_mspel_pixels_tab[ 9] = ff_put_vc1_mspel_mc12_c;
|
457 |
dsp->put_vc1_mspel_pixels_tab[10] = ff_put_vc1_mspel_mc22_c;
|
458 |
dsp->put_vc1_mspel_pixels_tab[11] = ff_put_vc1_mspel_mc32_c;
|
459 |
dsp->put_vc1_mspel_pixels_tab[12] = ff_put_vc1_mspel_mc03_c;
|
460 |
dsp->put_vc1_mspel_pixels_tab[13] = ff_put_vc1_mspel_mc13_c;
|
461 |
dsp->put_vc1_mspel_pixels_tab[14] = ff_put_vc1_mspel_mc23_c;
|
462 |
dsp->put_vc1_mspel_pixels_tab[15] = ff_put_vc1_mspel_mc33_c;
|
463 |
} |