ffmpeg / libavcodec / dsputil_template.c @ 325eefa2
History | View | Annotate | Download (47.9 KB)
1 |
/*
|
---|---|
2 |
* DSP utils
|
3 |
* Copyright (c) 2000, 2001 Fabrice Bellard
|
4 |
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
|
5 |
*
|
6 |
* gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
|
7 |
*
|
8 |
* This file is part of Libav.
|
9 |
*
|
10 |
* Libav is free software; you can redistribute it and/or
|
11 |
* modify it under the terms of the GNU Lesser General Public
|
12 |
* License as published by the Free Software Foundation; either
|
13 |
* version 2.1 of the License, or (at your option) any later version.
|
14 |
*
|
15 |
* Libav is distributed in the hope that it will be useful,
|
16 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
18 |
* Lesser General Public License for more details.
|
19 |
*
|
20 |
* You should have received a copy of the GNU Lesser General Public
|
21 |
* License along with Libav; if not, write to the Free Software
|
22 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
23 |
*/
|
24 |
|
25 |
/**
|
26 |
* @file
|
27 |
* DSP utils
|
28 |
*/
|
29 |
|
30 |
#include "dsputil.h" |
31 |
|
32 |
/* draw the edges of width 'w' of an image of size width, height */
|
33 |
//FIXME check that this is ok for mpeg4 interlaced
|
34 |
static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, int sides) |
35 |
{ |
36 |
uint8_t *ptr, *last_line; |
37 |
int i;
|
38 |
|
39 |
/* left and right */
|
40 |
ptr = buf; |
41 |
for(i=0;i<height;i++) { |
42 |
memset(ptr - w, ptr[0], w);
|
43 |
memset(ptr + width, ptr[width-1], w);
|
44 |
ptr += wrap; |
45 |
} |
46 |
|
47 |
/* top and bottom + corners */
|
48 |
buf -= w; |
49 |
last_line = buf + (height - 1) * wrap;
|
50 |
if (sides & EDGE_TOP)
|
51 |
for(i = 0; i < w; i++) |
52 |
memcpy(buf - (i + 1) * wrap, buf, width + w + w); // top |
53 |
if (sides & EDGE_BOTTOM)
|
54 |
for (i = 0; i < w; i++) |
55 |
memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); // bottom |
56 |
} |
57 |
|
58 |
/**
|
59 |
* Copy a rectangular area of samples to a temporary buffer and replicate the border samples.
|
60 |
* @param buf destination buffer
|
61 |
* @param src source buffer
|
62 |
* @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
|
63 |
* @param block_w width of block
|
64 |
* @param block_h height of block
|
65 |
* @param src_x x coordinate of the top left sample of the block in the source buffer
|
66 |
* @param src_y y coordinate of the top left sample of the block in the source buffer
|
67 |
* @param w width of the source buffer
|
68 |
* @param h height of the source buffer
|
69 |
*/
|
70 |
void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h, |
71 |
int src_x, int src_y, int w, int h){ |
72 |
int x, y;
|
73 |
int start_y, start_x, end_y, end_x;
|
74 |
|
75 |
if(src_y>= h){
|
76 |
src+= (h-1-src_y)*linesize;
|
77 |
src_y=h-1;
|
78 |
}else if(src_y<=-block_h){ |
79 |
src+= (1-block_h-src_y)*linesize;
|
80 |
src_y=1-block_h;
|
81 |
} |
82 |
if(src_x>= w){
|
83 |
src+= (w-1-src_x);
|
84 |
src_x=w-1;
|
85 |
}else if(src_x<=-block_w){ |
86 |
src+= (1-block_w-src_x);
|
87 |
src_x=1-block_w;
|
88 |
} |
89 |
|
90 |
start_y= FFMAX(0, -src_y);
|
91 |
start_x= FFMAX(0, -src_x);
|
92 |
end_y= FFMIN(block_h, h-src_y); |
93 |
end_x= FFMIN(block_w, w-src_x); |
94 |
assert(start_y < end_y && block_h); |
95 |
assert(start_x < end_x && block_w); |
96 |
|
97 |
w = end_x - start_x; |
98 |
src += start_y*linesize + start_x; |
99 |
buf += start_x; |
100 |
|
101 |
//top
|
102 |
for(y=0; y<start_y; y++){ |
103 |
memcpy(buf, src, w); |
104 |
buf += linesize; |
105 |
} |
106 |
|
107 |
// copy existing part
|
108 |
for(; y<end_y; y++){
|
109 |
memcpy(buf, src, w); |
110 |
src += linesize; |
111 |
buf += linesize; |
112 |
} |
113 |
|
114 |
//bottom
|
115 |
src -= linesize; |
116 |
for(; y<block_h; y++){
|
117 |
memcpy(buf, src, w); |
118 |
buf += linesize; |
119 |
} |
120 |
|
121 |
buf -= block_h * linesize + start_x; |
122 |
while (block_h--){
|
123 |
//left
|
124 |
for(x=0; x<start_x; x++){ |
125 |
buf[x] = buf[start_x]; |
126 |
} |
127 |
|
128 |
//right
|
129 |
for(x=end_x; x<block_w; x++){
|
130 |
buf[x] = buf[end_x - 1];
|
131 |
} |
132 |
buf += linesize; |
133 |
} |
134 |
} |
135 |
|
136 |
static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) |
137 |
{ |
138 |
int i;
|
139 |
for(i=0;i<8;i++) { |
140 |
pixels[0] += block[0]; |
141 |
pixels[1] += block[1]; |
142 |
pixels[2] += block[2]; |
143 |
pixels[3] += block[3]; |
144 |
pixels[4] += block[4]; |
145 |
pixels[5] += block[5]; |
146 |
pixels[6] += block[6]; |
147 |
pixels[7] += block[7]; |
148 |
pixels += line_size; |
149 |
block += 8;
|
150 |
} |
151 |
} |
152 |
|
153 |
static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size) |
154 |
{ |
155 |
int i;
|
156 |
for(i=0;i<4;i++) { |
157 |
pixels[0] += block[0]; |
158 |
pixels[1] += block[1]; |
159 |
pixels[2] += block[2]; |
160 |
pixels[3] += block[3]; |
161 |
pixels += line_size; |
162 |
block += 4;
|
163 |
} |
164 |
} |
165 |
|
166 |
#if 0
|
167 |
|
168 |
#define PIXOP2(OPNAME, OP) \
|
169 |
static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
170 |
{\
|
171 |
int i;\
|
172 |
for(i=0; i<h; i++){\
|
173 |
OP(*((uint64_t*)block), AV_RN64(pixels));\
|
174 |
pixels+=line_size;\
|
175 |
block +=line_size;\
|
176 |
}\
|
177 |
}\
|
178 |
\
|
179 |
static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
180 |
{\
|
181 |
int i;\
|
182 |
for(i=0; i<h; i++){\
|
183 |
const uint64_t a= AV_RN64(pixels );\
|
184 |
const uint64_t b= AV_RN64(pixels+1);\
|
185 |
OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
186 |
pixels+=line_size;\
|
187 |
block +=line_size;\
|
188 |
}\
|
189 |
}\
|
190 |
\
|
191 |
static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
192 |
{\
|
193 |
int i;\
|
194 |
for(i=0; i<h; i++){\
|
195 |
const uint64_t a= AV_RN64(pixels );\
|
196 |
const uint64_t b= AV_RN64(pixels+1);\
|
197 |
OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
198 |
pixels+=line_size;\
|
199 |
block +=line_size;\
|
200 |
}\
|
201 |
}\
|
202 |
\
|
203 |
static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
204 |
{\
|
205 |
int i;\
|
206 |
for(i=0; i<h; i++){\
|
207 |
const uint64_t a= AV_RN64(pixels );\
|
208 |
const uint64_t b= AV_RN64(pixels+line_size);\
|
209 |
OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
210 |
pixels+=line_size;\
|
211 |
block +=line_size;\
|
212 |
}\
|
213 |
}\
|
214 |
\
|
215 |
static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
216 |
{\
|
217 |
int i;\
|
218 |
for(i=0; i<h; i++){\
|
219 |
const uint64_t a= AV_RN64(pixels );\
|
220 |
const uint64_t b= AV_RN64(pixels+line_size);\
|
221 |
OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
|
222 |
pixels+=line_size;\
|
223 |
block +=line_size;\
|
224 |
}\
|
225 |
}\
|
226 |
\
|
227 |
static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
228 |
{\
|
229 |
int i;\
|
230 |
const uint64_t a= AV_RN64(pixels );\
|
231 |
const uint64_t b= AV_RN64(pixels+1);\
|
232 |
uint64_t l0= (a&0x0303030303030303ULL)\
|
233 |
+ (b&0x0303030303030303ULL)\
|
234 |
+ 0x0202020202020202ULL;\
|
235 |
uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
236 |
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
237 |
uint64_t l1,h1;\
|
238 |
\
|
239 |
pixels+=line_size;\
|
240 |
for(i=0; i<h; i+=2){\
|
241 |
uint64_t a= AV_RN64(pixels );\
|
242 |
uint64_t b= AV_RN64(pixels+1);\
|
243 |
l1= (a&0x0303030303030303ULL)\
|
244 |
+ (b&0x0303030303030303ULL);\
|
245 |
h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
246 |
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
247 |
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
248 |
pixels+=line_size;\
|
249 |
block +=line_size;\
|
250 |
a= AV_RN64(pixels );\
|
251 |
b= AV_RN64(pixels+1);\
|
252 |
l0= (a&0x0303030303030303ULL)\
|
253 |
+ (b&0x0303030303030303ULL)\
|
254 |
+ 0x0202020202020202ULL;\
|
255 |
h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
256 |
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
257 |
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
258 |
pixels+=line_size;\
|
259 |
block +=line_size;\
|
260 |
}\
|
261 |
}\
|
262 |
\
|
263 |
static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
|
264 |
{\
|
265 |
int i;\
|
266 |
const uint64_t a= AV_RN64(pixels );\
|
267 |
const uint64_t b= AV_RN64(pixels+1);\
|
268 |
uint64_t l0= (a&0x0303030303030303ULL)\
|
269 |
+ (b&0x0303030303030303ULL)\
|
270 |
+ 0x0101010101010101ULL;\
|
271 |
uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
272 |
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
273 |
uint64_t l1,h1;\
|
274 |
\
|
275 |
pixels+=line_size;\
|
276 |
for(i=0; i<h; i+=2){\
|
277 |
uint64_t a= AV_RN64(pixels );\
|
278 |
uint64_t b= AV_RN64(pixels+1);\
|
279 |
l1= (a&0x0303030303030303ULL)\
|
280 |
+ (b&0x0303030303030303ULL);\
|
281 |
h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
282 |
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
283 |
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
284 |
pixels+=line_size;\
|
285 |
block +=line_size;\
|
286 |
a= AV_RN64(pixels );\
|
287 |
b= AV_RN64(pixels+1);\
|
288 |
l0= (a&0x0303030303030303ULL)\
|
289 |
+ (b&0x0303030303030303ULL)\
|
290 |
+ 0x0101010101010101ULL;\
|
291 |
h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
|
292 |
+ ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
|
293 |
OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
|
294 |
pixels+=line_size;\
|
295 |
block +=line_size;\
|
296 |
}\
|
297 |
}\
|
298 |
\
|
299 |
CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels_c , 8)\
|
300 |
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
|
301 |
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
|
302 |
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
|
303 |
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
|
304 |
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
|
305 |
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
|
306 |
|
307 |
#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
|
308 |
#else // 64 bit variant
|
309 |
|
310 |
#define PIXOP2(OPNAME, OP) \
|
311 |
static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
312 |
int i;\
|
313 |
for(i=0; i<h; i++){\ |
314 |
OP(*((uint16_t*)(block )), AV_RN16(pixels ));\ |
315 |
pixels+=line_size;\ |
316 |
block +=line_size;\ |
317 |
}\ |
318 |
}\ |
319 |
static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
320 |
int i;\
|
321 |
for(i=0; i<h; i++){\ |
322 |
OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ |
323 |
pixels+=line_size;\ |
324 |
block +=line_size;\ |
325 |
}\ |
326 |
}\ |
327 |
static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
328 |
int i;\
|
329 |
for(i=0; i<h; i++){\ |
330 |
OP(*((uint32_t*)(block )), AV_RN32(pixels ));\ |
331 |
OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\ |
332 |
pixels+=line_size;\ |
333 |
block +=line_size;\ |
334 |
}\ |
335 |
}\ |
336 |
static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
337 |
OPNAME ## _pixels8_c(block, pixels, line_size, h);\ |
338 |
}\ |
339 |
\ |
340 |
static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
341 |
int src_stride1, int src_stride2, int h){\ |
342 |
int i;\
|
343 |
for(i=0; i<h; i++){\ |
344 |
uint32_t a,b;\ |
345 |
a= AV_RN32(&src1[i*src_stride1 ]);\ |
346 |
b= AV_RN32(&src2[i*src_stride2 ]);\ |
347 |
OP(*((uint32_t*)&dst[i*dst_stride ]), no_rnd_avg32(a, b));\ |
348 |
a= AV_RN32(&src1[i*src_stride1+4]);\
|
349 |
b= AV_RN32(&src2[i*src_stride2+4]);\
|
350 |
OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
|
351 |
}\ |
352 |
}\ |
353 |
\ |
354 |
static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
355 |
int src_stride1, int src_stride2, int h){\ |
356 |
int i;\
|
357 |
for(i=0; i<h; i++){\ |
358 |
uint32_t a,b;\ |
359 |
a= AV_RN32(&src1[i*src_stride1 ]);\ |
360 |
b= AV_RN32(&src2[i*src_stride2 ]);\ |
361 |
OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
362 |
a= AV_RN32(&src1[i*src_stride1+4]);\
|
363 |
b= AV_RN32(&src2[i*src_stride2+4]);\
|
364 |
OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
|
365 |
}\ |
366 |
}\ |
367 |
\ |
368 |
static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
369 |
int src_stride1, int src_stride2, int h){\ |
370 |
int i;\
|
371 |
for(i=0; i<h; i++){\ |
372 |
uint32_t a,b;\ |
373 |
a= AV_RN32(&src1[i*src_stride1 ]);\ |
374 |
b= AV_RN32(&src2[i*src_stride2 ]);\ |
375 |
OP(*((uint32_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
376 |
}\ |
377 |
}\ |
378 |
\ |
379 |
static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
380 |
int src_stride1, int src_stride2, int h){\ |
381 |
int i;\
|
382 |
for(i=0; i<h; i++){\ |
383 |
uint32_t a,b;\ |
384 |
a= AV_RN16(&src1[i*src_stride1 ]);\ |
385 |
b= AV_RN16(&src2[i*src_stride2 ]);\ |
386 |
OP(*((uint16_t*)&dst[i*dst_stride ]), rnd_avg32(a, b));\ |
387 |
}\ |
388 |
}\ |
389 |
\ |
390 |
static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
391 |
int src_stride1, int src_stride2, int h){\ |
392 |
OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ |
393 |
OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ |
394 |
}\ |
395 |
\ |
396 |
static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \ |
397 |
int src_stride1, int src_stride2, int h){\ |
398 |
OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\ |
399 |
OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\ |
400 |
}\ |
401 |
\ |
402 |
static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
403 |
OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
404 |
}\ |
405 |
\ |
406 |
static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
407 |
OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
408 |
}\ |
409 |
\ |
410 |
static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
411 |
OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
412 |
}\ |
413 |
\ |
414 |
static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
415 |
OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
416 |
}\ |
417 |
\ |
418 |
static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
419 |
int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
420 |
int i;\
|
421 |
for(i=0; i<h; i++){\ |
422 |
uint32_t a, b, c, d, l0, l1, h0, h1;\ |
423 |
a= AV_RN32(&src1[i*src_stride1]);\ |
424 |
b= AV_RN32(&src2[i*src_stride2]);\ |
425 |
c= AV_RN32(&src3[i*src_stride3]);\ |
426 |
d= AV_RN32(&src4[i*src_stride4]);\ |
427 |
l0= (a&0x03030303UL)\
|
428 |
+ (b&0x03030303UL)\
|
429 |
+ 0x02020202UL;\
|
430 |
h0= ((a&0xFCFCFCFCUL)>>2)\ |
431 |
+ ((b&0xFCFCFCFCUL)>>2);\ |
432 |
l1= (c&0x03030303UL)\
|
433 |
+ (d&0x03030303UL);\
|
434 |
h1= ((c&0xFCFCFCFCUL)>>2)\ |
435 |
+ ((d&0xFCFCFCFCUL)>>2);\ |
436 |
OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
437 |
a= AV_RN32(&src1[i*src_stride1+4]);\
|
438 |
b= AV_RN32(&src2[i*src_stride2+4]);\
|
439 |
c= AV_RN32(&src3[i*src_stride3+4]);\
|
440 |
d= AV_RN32(&src4[i*src_stride4+4]);\
|
441 |
l0= (a&0x03030303UL)\
|
442 |
+ (b&0x03030303UL)\
|
443 |
+ 0x02020202UL;\
|
444 |
h0= ((a&0xFCFCFCFCUL)>>2)\ |
445 |
+ ((b&0xFCFCFCFCUL)>>2);\ |
446 |
l1= (c&0x03030303UL)\
|
447 |
+ (d&0x03030303UL);\
|
448 |
h1= ((c&0xFCFCFCFCUL)>>2)\ |
449 |
+ ((d&0xFCFCFCFCUL)>>2);\ |
450 |
OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
451 |
}\ |
452 |
}\ |
453 |
\ |
454 |
static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
455 |
OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
456 |
}\ |
457 |
\ |
458 |
static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
459 |
OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
460 |
}\ |
461 |
\ |
462 |
static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
463 |
OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\ |
464 |
}\ |
465 |
\ |
466 |
static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
467 |
OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\ |
468 |
}\ |
469 |
\ |
470 |
static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
471 |
int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
472 |
int i;\
|
473 |
for(i=0; i<h; i++){\ |
474 |
uint32_t a, b, c, d, l0, l1, h0, h1;\ |
475 |
a= AV_RN32(&src1[i*src_stride1]);\ |
476 |
b= AV_RN32(&src2[i*src_stride2]);\ |
477 |
c= AV_RN32(&src3[i*src_stride3]);\ |
478 |
d= AV_RN32(&src4[i*src_stride4]);\ |
479 |
l0= (a&0x03030303UL)\
|
480 |
+ (b&0x03030303UL)\
|
481 |
+ 0x01010101UL;\
|
482 |
h0= ((a&0xFCFCFCFCUL)>>2)\ |
483 |
+ ((b&0xFCFCFCFCUL)>>2);\ |
484 |
l1= (c&0x03030303UL)\
|
485 |
+ (d&0x03030303UL);\
|
486 |
h1= ((c&0xFCFCFCFCUL)>>2)\ |
487 |
+ ((d&0xFCFCFCFCUL)>>2);\ |
488 |
OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
489 |
a= AV_RN32(&src1[i*src_stride1+4]);\
|
490 |
b= AV_RN32(&src2[i*src_stride2+4]);\
|
491 |
c= AV_RN32(&src3[i*src_stride3+4]);\
|
492 |
d= AV_RN32(&src4[i*src_stride4+4]);\
|
493 |
l0= (a&0x03030303UL)\
|
494 |
+ (b&0x03030303UL)\
|
495 |
+ 0x01010101UL;\
|
496 |
h0= ((a&0xFCFCFCFCUL)>>2)\ |
497 |
+ ((b&0xFCFCFCFCUL)>>2);\ |
498 |
l1= (c&0x03030303UL)\
|
499 |
+ (d&0x03030303UL);\
|
500 |
h1= ((c&0xFCFCFCFCUL)>>2)\ |
501 |
+ ((d&0xFCFCFCFCUL)>>2);\ |
502 |
OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
503 |
}\ |
504 |
}\ |
505 |
static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
506 |
int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
507 |
OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ |
508 |
OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ |
509 |
}\ |
510 |
static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\ |
511 |
int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\ |
512 |
OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ |
513 |
OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\ |
514 |
}\ |
515 |
\ |
516 |
static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
517 |
{\ |
518 |
int i, a0, b0, a1, b1;\
|
519 |
a0= pixels[0];\
|
520 |
b0= pixels[1] + 2;\ |
521 |
a0 += b0;\ |
522 |
b0 += pixels[2];\
|
523 |
\ |
524 |
pixels+=line_size;\ |
525 |
for(i=0; i<h; i+=2){\ |
526 |
a1= pixels[0];\
|
527 |
b1= pixels[1];\
|
528 |
a1 += b1;\ |
529 |
b1 += pixels[2];\
|
530 |
\ |
531 |
block[0]= (a1+a0)>>2; /* FIXME non put */\ |
532 |
block[1]= (b1+b0)>>2;\ |
533 |
\ |
534 |
pixels+=line_size;\ |
535 |
block +=line_size;\ |
536 |
\ |
537 |
a0= pixels[0];\
|
538 |
b0= pixels[1] + 2;\ |
539 |
a0 += b0;\ |
540 |
b0 += pixels[2];\
|
541 |
\ |
542 |
block[0]= (a1+a0)>>2;\ |
543 |
block[1]= (b1+b0)>>2;\ |
544 |
pixels+=line_size;\ |
545 |
block +=line_size;\ |
546 |
}\ |
547 |
}\ |
548 |
\ |
549 |
static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
550 |
{\ |
551 |
int i;\
|
552 |
const uint32_t a= AV_RN32(pixels );\
|
553 |
const uint32_t b= AV_RN32(pixels+1);\ |
554 |
uint32_t l0= (a&0x03030303UL)\
|
555 |
+ (b&0x03030303UL)\
|
556 |
+ 0x02020202UL;\
|
557 |
uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ |
558 |
+ ((b&0xFCFCFCFCUL)>>2);\ |
559 |
uint32_t l1,h1;\ |
560 |
\ |
561 |
pixels+=line_size;\ |
562 |
for(i=0; i<h; i+=2){\ |
563 |
uint32_t a= AV_RN32(pixels );\ |
564 |
uint32_t b= AV_RN32(pixels+1);\
|
565 |
l1= (a&0x03030303UL)\
|
566 |
+ (b&0x03030303UL);\
|
567 |
h1= ((a&0xFCFCFCFCUL)>>2)\ |
568 |
+ ((b&0xFCFCFCFCUL)>>2);\ |
569 |
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
570 |
pixels+=line_size;\ |
571 |
block +=line_size;\ |
572 |
a= AV_RN32(pixels );\ |
573 |
b= AV_RN32(pixels+1);\
|
574 |
l0= (a&0x03030303UL)\
|
575 |
+ (b&0x03030303UL)\
|
576 |
+ 0x02020202UL;\
|
577 |
h0= ((a&0xFCFCFCFCUL)>>2)\ |
578 |
+ ((b&0xFCFCFCFCUL)>>2);\ |
579 |
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
580 |
pixels+=line_size;\ |
581 |
block +=line_size;\ |
582 |
}\ |
583 |
}\ |
584 |
\ |
585 |
static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
586 |
{\ |
587 |
int j;\
|
588 |
for(j=0; j<2; j++){\ |
589 |
int i;\
|
590 |
const uint32_t a= AV_RN32(pixels );\
|
591 |
const uint32_t b= AV_RN32(pixels+1);\ |
592 |
uint32_t l0= (a&0x03030303UL)\
|
593 |
+ (b&0x03030303UL)\
|
594 |
+ 0x02020202UL;\
|
595 |
uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ |
596 |
+ ((b&0xFCFCFCFCUL)>>2);\ |
597 |
uint32_t l1,h1;\ |
598 |
\ |
599 |
pixels+=line_size;\ |
600 |
for(i=0; i<h; i+=2){\ |
601 |
uint32_t a= AV_RN32(pixels );\ |
602 |
uint32_t b= AV_RN32(pixels+1);\
|
603 |
l1= (a&0x03030303UL)\
|
604 |
+ (b&0x03030303UL);\
|
605 |
h1= ((a&0xFCFCFCFCUL)>>2)\ |
606 |
+ ((b&0xFCFCFCFCUL)>>2);\ |
607 |
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
608 |
pixels+=line_size;\ |
609 |
block +=line_size;\ |
610 |
a= AV_RN32(pixels );\ |
611 |
b= AV_RN32(pixels+1);\
|
612 |
l0= (a&0x03030303UL)\
|
613 |
+ (b&0x03030303UL)\
|
614 |
+ 0x02020202UL;\
|
615 |
h0= ((a&0xFCFCFCFCUL)>>2)\ |
616 |
+ ((b&0xFCFCFCFCUL)>>2);\ |
617 |
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
618 |
pixels+=line_size;\ |
619 |
block +=line_size;\ |
620 |
}\ |
621 |
pixels+=4-line_size*(h+1);\ |
622 |
block +=4-line_size*h;\
|
623 |
}\ |
624 |
}\ |
625 |
\ |
626 |
static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\ |
627 |
{\ |
628 |
int j;\
|
629 |
for(j=0; j<2; j++){\ |
630 |
int i;\
|
631 |
const uint32_t a= AV_RN32(pixels );\
|
632 |
const uint32_t b= AV_RN32(pixels+1);\ |
633 |
uint32_t l0= (a&0x03030303UL)\
|
634 |
+ (b&0x03030303UL)\
|
635 |
+ 0x01010101UL;\
|
636 |
uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\ |
637 |
+ ((b&0xFCFCFCFCUL)>>2);\ |
638 |
uint32_t l1,h1;\ |
639 |
\ |
640 |
pixels+=line_size;\ |
641 |
for(i=0; i<h; i+=2){\ |
642 |
uint32_t a= AV_RN32(pixels );\ |
643 |
uint32_t b= AV_RN32(pixels+1);\
|
644 |
l1= (a&0x03030303UL)\
|
645 |
+ (b&0x03030303UL);\
|
646 |
h1= ((a&0xFCFCFCFCUL)>>2)\ |
647 |
+ ((b&0xFCFCFCFCUL)>>2);\ |
648 |
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
649 |
pixels+=line_size;\ |
650 |
block +=line_size;\ |
651 |
a= AV_RN32(pixels );\ |
652 |
b= AV_RN32(pixels+1);\
|
653 |
l0= (a&0x03030303UL)\
|
654 |
+ (b&0x03030303UL)\
|
655 |
+ 0x01010101UL;\
|
656 |
h0= ((a&0xFCFCFCFCUL)>>2)\ |
657 |
+ ((b&0xFCFCFCFCUL)>>2);\ |
658 |
OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\ |
659 |
pixels+=line_size;\ |
660 |
block +=line_size;\ |
661 |
}\ |
662 |
pixels+=4-line_size*(h+1);\ |
663 |
block +=4-line_size*h;\
|
664 |
}\ |
665 |
}\ |
666 |
\ |
667 |
CALL_2X_PIXELS(OPNAME ## _pixels16_c , OPNAME ## _pixels8_c , 8)\ |
668 |
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\ |
669 |
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\ |
670 |
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\ |
671 |
av_unused CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c , OPNAME ## _pixels8_c , 8)\ |
672 |
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\ |
673 |
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\ |
674 |
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\ |
675 |
|
676 |
#define op_avg(a, b) a = rnd_avg32(a, b)
|
677 |
#endif
|
678 |
|
679 |
#define op_put(a, b) a = b
|
680 |
|
681 |
PIXOP2(avg, op_avg) |
682 |
PIXOP2(put, op_put) |
683 |
#undef op_avg
|
684 |
#undef op_put
|
685 |
|
686 |
#define put_no_rnd_pixels8_c put_pixels8_c
|
687 |
#define put_no_rnd_pixels16_c put_pixels16_c
|
688 |
|
689 |
static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ |
690 |
put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h); |
691 |
} |
692 |
|
693 |
static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){ |
694 |
put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h); |
695 |
} |
696 |
|
697 |
#define H264_CHROMA_MC(OPNAME, OP)\
|
698 |
static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ |
699 |
const int A=(8-x)*(8-y);\ |
700 |
const int B=( x)*(8-y);\ |
701 |
const int C=(8-x)*( y);\ |
702 |
const int D=( x)*( y);\ |
703 |
int i;\
|
704 |
\ |
705 |
assert(x<8 && y<8 && x>=0 && y>=0);\ |
706 |
\ |
707 |
if(D){\
|
708 |
for(i=0; i<h; i++){\ |
709 |
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
710 |
OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ |
711 |
dst+= stride;\ |
712 |
src+= stride;\ |
713 |
}\ |
714 |
}else{\
|
715 |
const int E= B+C;\ |
716 |
const int step= C ? stride : 1;\ |
717 |
for(i=0; i<h; i++){\ |
718 |
OP(dst[0], (A*src[0] + E*src[step+0]));\ |
719 |
OP(dst[1], (A*src[1] + E*src[step+1]));\ |
720 |
dst+= stride;\ |
721 |
src+= stride;\ |
722 |
}\ |
723 |
}\ |
724 |
}\ |
725 |
\ |
726 |
static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ |
727 |
const int A=(8-x)*(8-y);\ |
728 |
const int B=( x)*(8-y);\ |
729 |
const int C=(8-x)*( y);\ |
730 |
const int D=( x)*( y);\ |
731 |
int i;\
|
732 |
\ |
733 |
assert(x<8 && y<8 && x>=0 && y>=0);\ |
734 |
\ |
735 |
if(D){\
|
736 |
for(i=0; i<h; i++){\ |
737 |
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
738 |
OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ |
739 |
OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ |
740 |
OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ |
741 |
dst+= stride;\ |
742 |
src+= stride;\ |
743 |
}\ |
744 |
}else{\
|
745 |
const int E= B+C;\ |
746 |
const int step= C ? stride : 1;\ |
747 |
for(i=0; i<h; i++){\ |
748 |
OP(dst[0], (A*src[0] + E*src[step+0]));\ |
749 |
OP(dst[1], (A*src[1] + E*src[step+1]));\ |
750 |
OP(dst[2], (A*src[2] + E*src[step+2]));\ |
751 |
OP(dst[3], (A*src[3] + E*src[step+3]));\ |
752 |
dst+= stride;\ |
753 |
src+= stride;\ |
754 |
}\ |
755 |
}\ |
756 |
}\ |
757 |
\ |
758 |
static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\ |
759 |
const int A=(8-x)*(8-y);\ |
760 |
const int B=( x)*(8-y);\ |
761 |
const int C=(8-x)*( y);\ |
762 |
const int D=( x)*( y);\ |
763 |
int i;\
|
764 |
\ |
765 |
assert(x<8 && y<8 && x>=0 && y>=0);\ |
766 |
\ |
767 |
if(D){\
|
768 |
for(i=0; i<h; i++){\ |
769 |
OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\ |
770 |
OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\ |
771 |
OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\ |
772 |
OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\ |
773 |
OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\ |
774 |
OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\ |
775 |
OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\ |
776 |
OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\ |
777 |
dst+= stride;\ |
778 |
src+= stride;\ |
779 |
}\ |
780 |
}else{\
|
781 |
const int E= B+C;\ |
782 |
const int step= C ? stride : 1;\ |
783 |
for(i=0; i<h; i++){\ |
784 |
OP(dst[0], (A*src[0] + E*src[step+0]));\ |
785 |
OP(dst[1], (A*src[1] + E*src[step+1]));\ |
786 |
OP(dst[2], (A*src[2] + E*src[step+2]));\ |
787 |
OP(dst[3], (A*src[3] + E*src[step+3]));\ |
788 |
OP(dst[4], (A*src[4] + E*src[step+4]));\ |
789 |
OP(dst[5], (A*src[5] + E*src[step+5]));\ |
790 |
OP(dst[6], (A*src[6] + E*src[step+6]));\ |
791 |
OP(dst[7], (A*src[7] + E*src[step+7]));\ |
792 |
dst+= stride;\ |
793 |
src+= stride;\ |
794 |
}\ |
795 |
}\ |
796 |
} |
797 |
|
798 |
#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1) |
799 |
#define op_put(a, b) a = (((b) + 32)>>6) |
800 |
|
801 |
H264_CHROMA_MC(put_ , op_put) |
802 |
H264_CHROMA_MC(avg_ , op_avg) |
803 |
#undef op_avg
|
804 |
#undef op_put
|
805 |
|
806 |
#define H264_LOWPASS(OPNAME, OP, OP2) \
|
807 |
static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
808 |
const int h=2;\ |
809 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
810 |
int i;\
|
811 |
for(i=0; i<h; i++)\ |
812 |
{\ |
813 |
OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ |
814 |
OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ |
815 |
dst+=dstStride;\ |
816 |
src+=srcStride;\ |
817 |
}\ |
818 |
}\ |
819 |
\ |
820 |
static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
821 |
const int w=2;\ |
822 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
823 |
int i;\
|
824 |
for(i=0; i<w; i++)\ |
825 |
{\ |
826 |
const int srcB= src[-2*srcStride];\ |
827 |
const int srcA= src[-1*srcStride];\ |
828 |
const int src0= src[0 *srcStride];\ |
829 |
const int src1= src[1 *srcStride];\ |
830 |
const int src2= src[2 *srcStride];\ |
831 |
const int src3= src[3 *srcStride];\ |
832 |
const int src4= src[4 *srcStride];\ |
833 |
OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ |
834 |
OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ |
835 |
dst++;\ |
836 |
src++;\ |
837 |
}\ |
838 |
}\ |
839 |
\ |
840 |
static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
841 |
const int h=2;\ |
842 |
const int w=2;\ |
843 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
844 |
int i;\
|
845 |
src -= 2*srcStride;\
|
846 |
for(i=0; i<h+5; i++)\ |
847 |
{\ |
848 |
tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ |
849 |
tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ |
850 |
tmp+=tmpStride;\ |
851 |
src+=srcStride;\ |
852 |
}\ |
853 |
tmp -= tmpStride*(h+5-2);\ |
854 |
for(i=0; i<w; i++)\ |
855 |
{\ |
856 |
const int tmpB= tmp[-2*tmpStride];\ |
857 |
const int tmpA= tmp[-1*tmpStride];\ |
858 |
const int tmp0= tmp[0 *tmpStride];\ |
859 |
const int tmp1= tmp[1 *tmpStride];\ |
860 |
const int tmp2= tmp[2 *tmpStride];\ |
861 |
const int tmp3= tmp[3 *tmpStride];\ |
862 |
const int tmp4= tmp[4 *tmpStride];\ |
863 |
OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ |
864 |
OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ |
865 |
dst++;\ |
866 |
tmp++;\ |
867 |
}\ |
868 |
}\ |
869 |
static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
870 |
const int h=4;\ |
871 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
872 |
int i;\
|
873 |
for(i=0; i<h; i++)\ |
874 |
{\ |
875 |
OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ |
876 |
OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ |
877 |
OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\ |
878 |
OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\ |
879 |
dst+=dstStride;\ |
880 |
src+=srcStride;\ |
881 |
}\ |
882 |
}\ |
883 |
\ |
884 |
static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
885 |
const int w=4;\ |
886 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
887 |
int i;\
|
888 |
for(i=0; i<w; i++)\ |
889 |
{\ |
890 |
const int srcB= src[-2*srcStride];\ |
891 |
const int srcA= src[-1*srcStride];\ |
892 |
const int src0= src[0 *srcStride];\ |
893 |
const int src1= src[1 *srcStride];\ |
894 |
const int src2= src[2 *srcStride];\ |
895 |
const int src3= src[3 *srcStride];\ |
896 |
const int src4= src[4 *srcStride];\ |
897 |
const int src5= src[5 *srcStride];\ |
898 |
const int src6= src[6 *srcStride];\ |
899 |
OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ |
900 |
OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ |
901 |
OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ |
902 |
OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ |
903 |
dst++;\ |
904 |
src++;\ |
905 |
}\ |
906 |
}\ |
907 |
\ |
908 |
static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
909 |
const int h=4;\ |
910 |
const int w=4;\ |
911 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
912 |
int i;\
|
913 |
src -= 2*srcStride;\
|
914 |
for(i=0; i<h+5; i++)\ |
915 |
{\ |
916 |
tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ |
917 |
tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ |
918 |
tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\ |
919 |
tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\ |
920 |
tmp+=tmpStride;\ |
921 |
src+=srcStride;\ |
922 |
}\ |
923 |
tmp -= tmpStride*(h+5-2);\ |
924 |
for(i=0; i<w; i++)\ |
925 |
{\ |
926 |
const int tmpB= tmp[-2*tmpStride];\ |
927 |
const int tmpA= tmp[-1*tmpStride];\ |
928 |
const int tmp0= tmp[0 *tmpStride];\ |
929 |
const int tmp1= tmp[1 *tmpStride];\ |
930 |
const int tmp2= tmp[2 *tmpStride];\ |
931 |
const int tmp3= tmp[3 *tmpStride];\ |
932 |
const int tmp4= tmp[4 *tmpStride];\ |
933 |
const int tmp5= tmp[5 *tmpStride];\ |
934 |
const int tmp6= tmp[6 *tmpStride];\ |
935 |
OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ |
936 |
OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ |
937 |
OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ |
938 |
OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ |
939 |
dst++;\ |
940 |
tmp++;\ |
941 |
}\ |
942 |
}\ |
943 |
\ |
944 |
static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
945 |
const int h=8;\ |
946 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
947 |
int i;\
|
948 |
for(i=0; i<h; i++)\ |
949 |
{\ |
950 |
OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\ |
951 |
OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\ |
952 |
OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\ |
953 |
OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\ |
954 |
OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\ |
955 |
OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\ |
956 |
OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\ |
957 |
OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\ |
958 |
dst+=dstStride;\ |
959 |
src+=srcStride;\ |
960 |
}\ |
961 |
}\ |
962 |
\ |
963 |
static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
964 |
const int w=8;\ |
965 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
966 |
int i;\
|
967 |
for(i=0; i<w; i++)\ |
968 |
{\ |
969 |
const int srcB= src[-2*srcStride];\ |
970 |
const int srcA= src[-1*srcStride];\ |
971 |
const int src0= src[0 *srcStride];\ |
972 |
const int src1= src[1 *srcStride];\ |
973 |
const int src2= src[2 *srcStride];\ |
974 |
const int src3= src[3 *srcStride];\ |
975 |
const int src4= src[4 *srcStride];\ |
976 |
const int src5= src[5 *srcStride];\ |
977 |
const int src6= src[6 *srcStride];\ |
978 |
const int src7= src[7 *srcStride];\ |
979 |
const int src8= src[8 *srcStride];\ |
980 |
const int src9= src[9 *srcStride];\ |
981 |
const int src10=src[10*srcStride];\ |
982 |
OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ |
983 |
OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ |
984 |
OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\ |
985 |
OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\ |
986 |
OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\ |
987 |
OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\ |
988 |
OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\ |
989 |
OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\ |
990 |
dst++;\ |
991 |
src++;\ |
992 |
}\ |
993 |
}\ |
994 |
\ |
995 |
static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
996 |
const int h=8;\ |
997 |
const int w=8;\ |
998 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\ |
999 |
int i;\
|
1000 |
src -= 2*srcStride;\
|
1001 |
for(i=0; i<h+5; i++)\ |
1002 |
{\ |
1003 |
tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\ |
1004 |
tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\ |
1005 |
tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\ |
1006 |
tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\ |
1007 |
tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\ |
1008 |
tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\ |
1009 |
tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\ |
1010 |
tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\ |
1011 |
tmp+=tmpStride;\ |
1012 |
src+=srcStride;\ |
1013 |
}\ |
1014 |
tmp -= tmpStride*(h+5-2);\ |
1015 |
for(i=0; i<w; i++)\ |
1016 |
{\ |
1017 |
const int tmpB= tmp[-2*tmpStride];\ |
1018 |
const int tmpA= tmp[-1*tmpStride];\ |
1019 |
const int tmp0= tmp[0 *tmpStride];\ |
1020 |
const int tmp1= tmp[1 *tmpStride];\ |
1021 |
const int tmp2= tmp[2 *tmpStride];\ |
1022 |
const int tmp3= tmp[3 *tmpStride];\ |
1023 |
const int tmp4= tmp[4 *tmpStride];\ |
1024 |
const int tmp5= tmp[5 *tmpStride];\ |
1025 |
const int tmp6= tmp[6 *tmpStride];\ |
1026 |
const int tmp7= tmp[7 *tmpStride];\ |
1027 |
const int tmp8= tmp[8 *tmpStride];\ |
1028 |
const int tmp9= tmp[9 *tmpStride];\ |
1029 |
const int tmp10=tmp[10*tmpStride];\ |
1030 |
OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ |
1031 |
OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ |
1032 |
OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\ |
1033 |
OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\ |
1034 |
OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\ |
1035 |
OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\ |
1036 |
OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\ |
1037 |
OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\ |
1038 |
dst++;\ |
1039 |
tmp++;\ |
1040 |
}\ |
1041 |
}\ |
1042 |
\ |
1043 |
static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
1044 |
OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ |
1045 |
OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ |
1046 |
src += 8*srcStride;\
|
1047 |
dst += 8*dstStride;\
|
1048 |
OPNAME ## h264_qpel8_v_lowpass(dst , src , dstStride, srcStride);\ |
1049 |
OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\ |
1050 |
}\ |
1051 |
\ |
1052 |
static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ |
1053 |
OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ |
1054 |
OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ |
1055 |
src += 8*srcStride;\
|
1056 |
dst += 8*dstStride;\
|
1057 |
OPNAME ## h264_qpel8_h_lowpass(dst , src , dstStride, srcStride);\ |
1058 |
OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\ |
1059 |
}\ |
1060 |
\ |
1061 |
static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ |
1062 |
OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ |
1063 |
OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ |
1064 |
src += 8*srcStride;\
|
1065 |
dst += 8*dstStride;\
|
1066 |
OPNAME ## h264_qpel8_hv_lowpass(dst , tmp , src , dstStride, tmpStride, srcStride);\ |
1067 |
OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\ |
1068 |
}\ |
1069 |
|
1070 |
#define H264_MC(OPNAME, SIZE) \
|
1071 |
static av_unused void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\ |
1072 |
OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\ |
1073 |
}\ |
1074 |
\ |
1075 |
static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\ |
1076 |
uint8_t half[SIZE*SIZE];\ |
1077 |
put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ |
1078 |
OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\ |
1079 |
}\ |
1080 |
\ |
1081 |
static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\ |
1082 |
OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\ |
1083 |
}\ |
1084 |
\ |
1085 |
static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\ |
1086 |
uint8_t half[SIZE*SIZE];\ |
1087 |
put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\ |
1088 |
OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\ |
1089 |
}\ |
1090 |
\ |
1091 |
static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\ |
1092 |
uint8_t full[SIZE*(SIZE+5)];\
|
1093 |
uint8_t * const full_mid= full + SIZE*2;\ |
1094 |
uint8_t half[SIZE*SIZE];\ |
1095 |
copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ |
1096 |
put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ |
1097 |
OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\ |
1098 |
}\ |
1099 |
\ |
1100 |
static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\ |
1101 |
uint8_t full[SIZE*(SIZE+5)];\
|
1102 |
uint8_t * const full_mid= full + SIZE*2;\ |
1103 |
copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ |
1104 |
OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\ |
1105 |
}\ |
1106 |
\ |
1107 |
static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\ |
1108 |
uint8_t full[SIZE*(SIZE+5)];\
|
1109 |
uint8_t * const full_mid= full + SIZE*2;\ |
1110 |
uint8_t half[SIZE*SIZE];\ |
1111 |
copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ |
1112 |
put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\ |
1113 |
OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\ |
1114 |
}\ |
1115 |
\ |
1116 |
static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\ |
1117 |
uint8_t full[SIZE*(SIZE+5)];\
|
1118 |
uint8_t * const full_mid= full + SIZE*2;\ |
1119 |
uint8_t halfH[SIZE*SIZE];\ |
1120 |
uint8_t halfV[SIZE*SIZE];\ |
1121 |
put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ |
1122 |
copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ |
1123 |
put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ |
1124 |
OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ |
1125 |
}\ |
1126 |
\ |
1127 |
static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\ |
1128 |
uint8_t full[SIZE*(SIZE+5)];\
|
1129 |
uint8_t * const full_mid= full + SIZE*2;\ |
1130 |
uint8_t halfH[SIZE*SIZE];\ |
1131 |
uint8_t halfV[SIZE*SIZE];\ |
1132 |
put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ |
1133 |
copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ |
1134 |
put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ |
1135 |
OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ |
1136 |
}\ |
1137 |
\ |
1138 |
static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\ |
1139 |
uint8_t full[SIZE*(SIZE+5)];\
|
1140 |
uint8_t * const full_mid= full + SIZE*2;\ |
1141 |
uint8_t halfH[SIZE*SIZE];\ |
1142 |
uint8_t halfV[SIZE*SIZE];\ |
1143 |
put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ |
1144 |
copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ |
1145 |
put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ |
1146 |
OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ |
1147 |
}\ |
1148 |
\ |
1149 |
static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\ |
1150 |
uint8_t full[SIZE*(SIZE+5)];\
|
1151 |
uint8_t * const full_mid= full + SIZE*2;\ |
1152 |
uint8_t halfH[SIZE*SIZE];\ |
1153 |
uint8_t halfV[SIZE*SIZE];\ |
1154 |
put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ |
1155 |
copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ |
1156 |
put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ |
1157 |
OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\ |
1158 |
}\ |
1159 |
\ |
1160 |
static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\ |
1161 |
int16_t tmp[SIZE*(SIZE+5)];\
|
1162 |
OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\ |
1163 |
}\ |
1164 |
\ |
1165 |
static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\ |
1166 |
int16_t tmp[SIZE*(SIZE+5)];\
|
1167 |
uint8_t halfH[SIZE*SIZE];\ |
1168 |
uint8_t halfHV[SIZE*SIZE];\ |
1169 |
put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\ |
1170 |
put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ |
1171 |
OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ |
1172 |
}\ |
1173 |
\ |
1174 |
static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\ |
1175 |
int16_t tmp[SIZE*(SIZE+5)];\
|
1176 |
uint8_t halfH[SIZE*SIZE];\ |
1177 |
uint8_t halfHV[SIZE*SIZE];\ |
1178 |
put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\ |
1179 |
put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ |
1180 |
OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\ |
1181 |
}\ |
1182 |
\ |
1183 |
static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\ |
1184 |
uint8_t full[SIZE*(SIZE+5)];\
|
1185 |
uint8_t * const full_mid= full + SIZE*2;\ |
1186 |
int16_t tmp[SIZE*(SIZE+5)];\
|
1187 |
uint8_t halfV[SIZE*SIZE];\ |
1188 |
uint8_t halfHV[SIZE*SIZE];\ |
1189 |
copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\ |
1190 |
put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ |
1191 |
put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ |
1192 |
OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ |
1193 |
}\ |
1194 |
\ |
1195 |
static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\ |
1196 |
uint8_t full[SIZE*(SIZE+5)];\
|
1197 |
uint8_t * const full_mid= full + SIZE*2;\ |
1198 |
int16_t tmp[SIZE*(SIZE+5)];\
|
1199 |
uint8_t halfV[SIZE*SIZE];\ |
1200 |
uint8_t halfHV[SIZE*SIZE];\ |
1201 |
copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\ |
1202 |
put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\ |
1203 |
put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\ |
1204 |
OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\ |
1205 |
}\ |
1206 |
|
1207 |
#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1) |
1208 |
//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
|
1209 |
#define op_put(a, b) a = cm[((b) + 16)>>5] |
1210 |
#define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1) |
1211 |
#define op2_put(a, b) a = cm[((b) + 512)>>10] |
1212 |
|
1213 |
H264_LOWPASS(put_ , op_put, op2_put) |
1214 |
H264_LOWPASS(avg_ , op_avg, op2_avg) |
1215 |
H264_MC(put_, 2)
|
1216 |
H264_MC(put_, 4)
|
1217 |
H264_MC(put_, 8)
|
1218 |
H264_MC(put_, 16)
|
1219 |
H264_MC(avg_, 4)
|
1220 |
H264_MC(avg_, 8)
|
1221 |
H264_MC(avg_, 16)
|
1222 |
|
1223 |
#undef op_avg
|
1224 |
#undef op_put
|
1225 |
#undef op2_avg
|
1226 |
#undef op2_put
|
1227 |
|
1228 |
#define put_h264_qpel8_mc00_c ff_put_pixels8x8_c
|
1229 |
#define avg_h264_qpel8_mc00_c ff_avg_pixels8x8_c
|
1230 |
#define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
|
1231 |
#define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
|
1232 |
|
1233 |
void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) { |
1234 |
put_pixels8_c(dst, src, stride, 8);
|
1235 |
} |
1236 |
void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) { |
1237 |
avg_pixels8_c(dst, src, stride, 8);
|
1238 |
} |
1239 |
void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) { |
1240 |
put_pixels16_c(dst, src, stride, 16);
|
1241 |
} |
1242 |
void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) { |
1243 |
avg_pixels16_c(dst, src, stride, 16);
|
1244 |
} |
1245 |
|
1246 |
static void clear_block_c(DCTELEM *block) |
1247 |
{ |
1248 |
memset(block, 0, sizeof(DCTELEM)*64); |
1249 |
} |
1250 |
|
1251 |
/**
|
1252 |
* memset(blocks, 0, sizeof(DCTELEM)*6*64)
|
1253 |
*/
|
1254 |
static void clear_blocks_c(DCTELEM *blocks) |
1255 |
{ |
1256 |
memset(blocks, 0, sizeof(DCTELEM)*6*64); |
1257 |
} |