Revision 2b092f7a libavcodec/h264pred_internal.h
libavcodec/h264pred_internal.h | ||
---|---|---|
28 | 28 |
#include "mathops.h" |
29 | 29 |
#include "dsputil.h" |
30 | 30 |
|
31 |
static void pred4x4_vertical_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
32 |
const uint32_t a= ((uint32_t*)(src-stride))[0]; |
|
33 |
((uint32_t*)(src+0*stride))[0]= a; |
|
34 |
((uint32_t*)(src+1*stride))[0]= a; |
|
35 |
((uint32_t*)(src+2*stride))[0]= a; |
|
36 |
((uint32_t*)(src+3*stride))[0]= a; |
|
37 |
} |
|
38 |
|
|
39 |
static void pred4x4_horizontal_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
40 |
((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101; |
|
41 |
((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101; |
|
42 |
((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101; |
|
43 |
((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101; |
|
44 |
} |
|
45 |
|
|
46 |
static void pred4x4_dc_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
31 |
#define BIT_DEPTH 8 |
|
32 |
|
|
33 |
#define pixel uint8_t |
|
34 |
#define pixel4 uint32_t |
|
35 |
#define dctcoef DCTELEM |
|
36 |
|
|
37 |
#define INIT_CLIP uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
|
38 |
#define CLIP(a) cm[a] |
|
39 |
#define FUNC(a) a |
|
40 |
#define FUNCC(a) a ## _c |
|
41 |
#define PIXEL_SPLAT_X4(x) ((x)*0x01010101U) |
|
42 |
#define AV_WN4P AV_WN32 |
|
43 |
#define AV_WN4PA AV_WN32A |
|
44 |
|
|
45 |
static void FUNCC(pred4x4_vertical)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
46 |
pixel *src = (pixel*)p_src; |
|
47 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
48 |
const pixel4 a= ((pixel4*)(src-stride))[0]; |
|
49 |
((pixel4*)(src+0*stride))[0]= a; |
|
50 |
((pixel4*)(src+1*stride))[0]= a; |
|
51 |
((pixel4*)(src+2*stride))[0]= a; |
|
52 |
((pixel4*)(src+3*stride))[0]= a; |
|
53 |
} |
|
54 |
|
|
55 |
static void FUNCC(pred4x4_horizontal)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
56 |
pixel *src = (pixel*)p_src; |
|
57 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
58 |
((pixel4*)(src+0*stride))[0]= PIXEL_SPLAT_X4(src[-1+0*stride]); |
|
59 |
((pixel4*)(src+1*stride))[0]= PIXEL_SPLAT_X4(src[-1+1*stride]); |
|
60 |
((pixel4*)(src+2*stride))[0]= PIXEL_SPLAT_X4(src[-1+2*stride]); |
|
61 |
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(src[-1+3*stride]); |
|
62 |
} |
|
63 |
|
|
64 |
static void FUNCC(pred4x4_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
65 |
pixel *src = (pixel*)p_src; |
|
66 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
47 | 67 |
const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] |
48 | 68 |
+ src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3; |
49 | 69 |
|
50 |
((uint32_t*)(src+0*stride))[0]=
|
|
51 |
((uint32_t*)(src+1*stride))[0]=
|
|
52 |
((uint32_t*)(src+2*stride))[0]=
|
|
53 |
((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
|
|
70 |
((pixel4*)(src+0*stride))[0]=
|
|
71 |
((pixel4*)(src+1*stride))[0]=
|
|
72 |
((pixel4*)(src+2*stride))[0]=
|
|
73 |
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc);
|
|
54 | 74 |
} |
55 | 75 |
|
56 |
static void pred4x4_left_dc_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
76 |
static void FUNCC(pred4x4_left_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
77 |
pixel *src = (pixel*)p_src; |
|
78 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
57 | 79 |
const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2; |
58 | 80 |
|
59 |
((uint32_t*)(src+0*stride))[0]=
|
|
60 |
((uint32_t*)(src+1*stride))[0]=
|
|
61 |
((uint32_t*)(src+2*stride))[0]=
|
|
62 |
((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
|
|
81 |
((pixel4*)(src+0*stride))[0]=
|
|
82 |
((pixel4*)(src+1*stride))[0]=
|
|
83 |
((pixel4*)(src+2*stride))[0]=
|
|
84 |
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc);
|
|
63 | 85 |
} |
64 | 86 |
|
65 |
static void pred4x4_top_dc_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
87 |
static void FUNCC(pred4x4_top_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
88 |
pixel *src = (pixel*)p_src; |
|
89 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
66 | 90 |
const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2; |
67 | 91 |
|
68 |
((uint32_t*)(src+0*stride))[0]=
|
|
69 |
((uint32_t*)(src+1*stride))[0]=
|
|
70 |
((uint32_t*)(src+2*stride))[0]=
|
|
71 |
((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
|
|
92 |
((pixel4*)(src+0*stride))[0]=
|
|
93 |
((pixel4*)(src+1*stride))[0]=
|
|
94 |
((pixel4*)(src+2*stride))[0]=
|
|
95 |
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc);
|
|
72 | 96 |
} |
73 | 97 |
|
74 |
static void pred4x4_128_dc_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
75 |
((uint32_t*)(src+0*stride))[0]= |
|
76 |
((uint32_t*)(src+1*stride))[0]= |
|
77 |
((uint32_t*)(src+2*stride))[0]= |
|
78 |
((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U; |
|
98 |
static void FUNCC(pred4x4_128_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
99 |
pixel *src = (pixel*)p_src; |
|
100 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
101 |
((pixel4*)(src+0*stride))[0]= |
|
102 |
((pixel4*)(src+1*stride))[0]= |
|
103 |
((pixel4*)(src+2*stride))[0]= |
|
104 |
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)); |
|
79 | 105 |
} |
80 | 106 |
|
81 |
static void pred4x4_127_dc_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
82 |
((uint32_t*)(src+0*stride))[0]= |
|
83 |
((uint32_t*)(src+1*stride))[0]= |
|
84 |
((uint32_t*)(src+2*stride))[0]= |
|
85 |
((uint32_t*)(src+3*stride))[0]= 127U*0x01010101U; |
|
107 |
static void FUNCC(pred4x4_127_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
108 |
pixel *src = (pixel*)p_src; |
|
109 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
110 |
((pixel4*)(src+0*stride))[0]= |
|
111 |
((pixel4*)(src+1*stride))[0]= |
|
112 |
((pixel4*)(src+2*stride))[0]= |
|
113 |
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1); |
|
86 | 114 |
} |
87 | 115 |
|
88 |
static void pred4x4_129_dc_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
89 |
((uint32_t*)(src+0*stride))[0]= |
|
90 |
((uint32_t*)(src+1*stride))[0]= |
|
91 |
((uint32_t*)(src+2*stride))[0]= |
|
92 |
((uint32_t*)(src+3*stride))[0]= 129U*0x01010101U; |
|
116 |
static void FUNCC(pred4x4_129_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
117 |
pixel *src = (pixel*)p_src; |
|
118 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
119 |
((pixel4*)(src+0*stride))[0]= |
|
120 |
((pixel4*)(src+1*stride))[0]= |
|
121 |
((pixel4*)(src+2*stride))[0]= |
|
122 |
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1); |
|
93 | 123 |
} |
94 | 124 |
|
95 | 125 |
|
... | ... | |
117 | 147 |
const int av_unused t2= src[ 2-1*stride];\ |
118 | 148 |
const int av_unused t3= src[ 3-1*stride];\ |
119 | 149 |
|
120 |
static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
150 |
static void FUNCC(pred4x4_vertical_vp8)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
|
151 |
pixel *src = (pixel*)p_src; |
|
152 |
const pixel *topright = (const pixel*)p_topright; |
|
153 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
121 | 154 |
const int lt= src[-1-1*stride]; |
122 | 155 |
LOAD_TOP_EDGE |
123 | 156 |
LOAD_TOP_RIGHT_EDGE |
124 |
uint32_t v = PACK_4U8((lt + 2*t0 + t1 + 2) >> 2,
|
|
157 |
pixel4 v = PACK_4U8((lt + 2*t0 + t1 + 2) >> 2,
|
|
125 | 158 |
(t0 + 2*t1 + t2 + 2) >> 2, |
126 | 159 |
(t1 + 2*t2 + t3 + 2) >> 2, |
127 | 160 |
(t2 + 2*t3 + t4 + 2) >> 2); |
128 | 161 |
|
129 |
AV_WN32A(src+0*stride, v);
|
|
130 |
AV_WN32A(src+1*stride, v);
|
|
131 |
AV_WN32A(src+2*stride, v);
|
|
132 |
AV_WN32A(src+3*stride, v);
|
|
162 |
AV_WN4PA(src+0*stride, v);
|
|
163 |
AV_WN4PA(src+1*stride, v);
|
|
164 |
AV_WN4PA(src+2*stride, v);
|
|
165 |
AV_WN4PA(src+3*stride, v);
|
|
133 | 166 |
} |
134 | 167 |
|
135 |
static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
168 |
static void FUNCC(pred4x4_horizontal_vp8)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
169 |
pixel *src = (pixel*)p_src; |
|
170 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
136 | 171 |
const int lt= src[-1-1*stride]; |
137 | 172 |
LOAD_LEFT_EDGE |
138 | 173 |
|
139 |
AV_WN32A(src+0*stride, ((lt + 2*l0 + l1 + 2) >> 2)*0x01010101);
|
|
140 |
AV_WN32A(src+1*stride, ((l0 + 2*l1 + l2 + 2) >> 2)*0x01010101);
|
|
141 |
AV_WN32A(src+2*stride, ((l1 + 2*l2 + l3 + 2) >> 2)*0x01010101);
|
|
142 |
AV_WN32A(src+3*stride, ((l2 + 2*l3 + l3 + 2) >> 2)*0x01010101);
|
|
174 |
AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4((lt + 2*l0 + l1 + 2) >> 2));
|
|
175 |
AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4((l0 + 2*l1 + l2 + 2) >> 2));
|
|
176 |
AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4((l1 + 2*l2 + l3 + 2) >> 2));
|
|
177 |
AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4((l2 + 2*l3 + l3 + 2) >> 2));
|
|
143 | 178 |
} |
144 | 179 |
|
145 |
static void pred4x4_down_right_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
180 |
static void FUNCC(pred4x4_down_right)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
181 |
pixel *src = (pixel*)p_src; |
|
182 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
146 | 183 |
const int lt= src[-1-1*stride]; |
147 | 184 |
LOAD_TOP_EDGE |
148 | 185 |
LOAD_LEFT_EDGE |
... | ... | |
165 | 202 |
src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2; |
166 | 203 |
} |
167 | 204 |
|
168 |
static void pred4x4_down_left_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
205 |
static void FUNCC(pred4x4_down_left)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
|
206 |
pixel *src = (pixel*)p_src; |
|
207 |
const pixel *topright = (const pixel*)p_topright; |
|
208 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
169 | 209 |
LOAD_TOP_EDGE |
170 | 210 |
LOAD_TOP_RIGHT_EDGE |
171 | 211 |
// LOAD_LEFT_EDGE |
... | ... | |
188 | 228 |
src[3+3*stride]=(t6 + 3*t7 + 2)>>2; |
189 | 229 |
} |
190 | 230 |
|
191 |
static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
231 |
static void FUNCC(pred4x4_down_left_svq3)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
232 |
pixel *src = (pixel*)p_src; |
|
233 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
192 | 234 |
LOAD_TOP_EDGE |
193 | 235 |
LOAD_LEFT_EDGE |
194 | 236 |
const av_unused int unu0= t0; |
... | ... | |
212 | 254 |
src[3+3*stride]=(l3 + t3)>>1; |
213 | 255 |
} |
214 | 256 |
|
215 |
static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
257 |
static void FUNCC(pred4x4_down_left_rv40)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
|
258 |
pixel *src = (pixel*)p_src; |
|
259 |
const pixel *topright = (const pixel*)p_topright; |
|
260 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
216 | 261 |
LOAD_TOP_EDGE |
217 | 262 |
LOAD_TOP_RIGHT_EDGE |
218 | 263 |
LOAD_LEFT_EDGE |
... | ... | |
236 | 281 |
src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2; |
237 | 282 |
} |
238 | 283 |
|
239 |
static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
284 |
static void FUNCC(pred4x4_down_left_rv40_nodown)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
|
285 |
pixel *src = (pixel*)p_src; |
|
286 |
const pixel *topright = (const pixel*)p_topright; |
|
287 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
240 | 288 |
LOAD_TOP_EDGE |
241 | 289 |
LOAD_TOP_RIGHT_EDGE |
242 | 290 |
LOAD_LEFT_EDGE |
... | ... | |
259 | 307 |
src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2; |
260 | 308 |
} |
261 | 309 |
|
262 |
static void pred4x4_vertical_right_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
310 |
static void FUNCC(pred4x4_vertical_right)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
311 |
pixel *src = (pixel*)p_src; |
|
312 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
263 | 313 |
const int lt= src[-1-1*stride]; |
264 | 314 |
LOAD_TOP_EDGE |
265 | 315 |
LOAD_LEFT_EDGE |
... | ... | |
282 | 332 |
src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; |
283 | 333 |
} |
284 | 334 |
|
285 |
static void pred4x4_vertical_left_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
335 |
static void FUNCC(pred4x4_vertical_left)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
|
336 |
pixel *src = (pixel*)p_src; |
|
337 |
const pixel *topright = (const pixel*)p_topright; |
|
338 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
286 | 339 |
LOAD_TOP_EDGE |
287 | 340 |
LOAD_TOP_RIGHT_EDGE |
288 | 341 |
|
... | ... | |
304 | 357 |
src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; |
305 | 358 |
} |
306 | 359 |
|
307 |
static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright, int stride,
|
|
360 |
static void FUNCC(pred4x4_vertical_left_rv40_internal)(uint8_t *p_src, const uint8_t *p_topright, int p_stride,
|
|
308 | 361 |
const int l0, const int l1, const int l2, const int l3, const int l4){ |
362 |
pixel *src = (pixel*)p_src; |
|
363 |
const pixel *topright = (const pixel*)p_topright; |
|
364 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
309 | 365 |
LOAD_TOP_EDGE |
310 | 366 |
LOAD_TOP_RIGHT_EDGE |
311 | 367 |
|
... | ... | |
327 | 383 |
src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; |
328 | 384 |
} |
329 | 385 |
|
330 |
static void pred4x4_vertical_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
386 |
static void FUNCC(pred4x4_vertical_left_rv40)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
387 |
pixel *src = (pixel*)p_src; |
|
388 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
331 | 389 |
LOAD_LEFT_EDGE |
332 | 390 |
LOAD_DOWN_LEFT_EDGE |
333 | 391 |
|
334 |
pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4);
|
|
392 |
FUNCC(pred4x4_vertical_left_rv40_internal)(p_src, topright, p_stride, l0, l1, l2, l3, l4);
|
|
335 | 393 |
} |
336 | 394 |
|
337 |
static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
395 |
static void FUNCC(pred4x4_vertical_left_rv40_nodown)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
396 |
pixel *src = (pixel*)p_src; |
|
397 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
338 | 398 |
LOAD_LEFT_EDGE |
339 | 399 |
|
340 |
pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3);
|
|
400 |
FUNCC(pred4x4_vertical_left_rv40_internal)(p_src, topright, p_stride, l0, l1, l2, l3, l3);
|
|
341 | 401 |
} |
342 | 402 |
|
343 |
static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
403 |
static void FUNCC(pred4x4_vertical_left_vp8)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
|
404 |
pixel *src = (pixel*)p_src; |
|
405 |
const pixel *topright = (const pixel*)p_topright; |
|
406 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
344 | 407 |
LOAD_TOP_EDGE |
345 | 408 |
LOAD_TOP_RIGHT_EDGE |
346 | 409 |
|
... | ... | |
362 | 425 |
src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2; |
363 | 426 |
} |
364 | 427 |
|
365 |
static void pred4x4_horizontal_up_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
428 |
static void FUNCC(pred4x4_horizontal_up)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
429 |
pixel *src = (pixel*)p_src; |
|
430 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
366 | 431 |
LOAD_LEFT_EDGE |
367 | 432 |
|
368 | 433 |
src[0+0*stride]=(l0 + l1 + 1)>>1; |
... | ... | |
383 | 448 |
src[3+3*stride]=l3; |
384 | 449 |
} |
385 | 450 |
|
386 |
static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
451 |
static void FUNCC(pred4x4_horizontal_up_rv40)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
|
452 |
pixel *src = (pixel*)p_src; |
|
453 |
const pixel *topright = (const pixel*)p_topright; |
|
454 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
387 | 455 |
LOAD_LEFT_EDGE |
388 | 456 |
LOAD_DOWN_LEFT_EDGE |
389 | 457 |
LOAD_TOP_EDGE |
... | ... | |
407 | 475 |
src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2; |
408 | 476 |
} |
409 | 477 |
|
410 |
static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
478 |
static void FUNCC(pred4x4_horizontal_up_rv40_nodown)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
|
479 |
pixel *src = (pixel*)p_src; |
|
480 |
const pixel *topright = (const pixel*)p_topright; |
|
481 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
411 | 482 |
LOAD_LEFT_EDGE |
412 | 483 |
LOAD_TOP_EDGE |
413 | 484 |
LOAD_TOP_RIGHT_EDGE |
... | ... | |
430 | 501 |
src[3+3*stride]=l3; |
431 | 502 |
} |
432 | 503 |
|
433 |
static void pred4x4_horizontal_down_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
504 |
static void FUNCC(pred4x4_horizontal_down)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
505 |
pixel *src = (pixel*)p_src; |
|
506 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
434 | 507 |
const int lt= src[-1-1*stride]; |
435 | 508 |
LOAD_TOP_EDGE |
436 | 509 |
LOAD_LEFT_EDGE |
... | ... | |
453 | 526 |
src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2; |
454 | 527 |
} |
455 | 528 |
|
456 |
static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){ |
|
529 |
static void FUNCC(pred4x4_tm_vp8)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
|
530 |
pixel *src = (pixel*)p_src; |
|
531 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
457 | 532 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride]; |
458 |
uint8_t *top = src-stride;
|
|
533 |
pixel *top = src-stride;
|
|
459 | 534 |
int y; |
460 | 535 |
|
461 | 536 |
for (y = 0; y < 4; y++) { |
... | ... | |
468 | 543 |
} |
469 | 544 |
} |
470 | 545 |
|
471 |
static void pred16x16_vertical_c(uint8_t *src, int stride){
|
|
546 |
static void FUNCC(pred16x16_vertical)(uint8_t *p_src, int p_stride){
|
|
472 | 547 |
int i; |
473 |
const uint32_t a= ((uint32_t*)(src-stride))[0]; |
|
474 |
const uint32_t b= ((uint32_t*)(src-stride))[1]; |
|
475 |
const uint32_t c= ((uint32_t*)(src-stride))[2]; |
|
476 |
const uint32_t d= ((uint32_t*)(src-stride))[3]; |
|
548 |
pixel *src = (pixel*)p_src; |
|
549 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
550 |
const pixel4 a = ((pixel4*)(src-stride))[0]; |
|
551 |
const pixel4 b = ((pixel4*)(src-stride))[1]; |
|
552 |
const pixel4 c = ((pixel4*)(src-stride))[2]; |
|
553 |
const pixel4 d = ((pixel4*)(src-stride))[3]; |
|
477 | 554 |
|
478 | 555 |
for(i=0; i<16; i++){ |
479 |
((uint32_t*)(src+i*stride))[0]= a;
|
|
480 |
((uint32_t*)(src+i*stride))[1]= b;
|
|
481 |
((uint32_t*)(src+i*stride))[2]= c;
|
|
482 |
((uint32_t*)(src+i*stride))[3]= d;
|
|
556 |
((pixel4*)(src+i*stride))[0] = a;
|
|
557 |
((pixel4*)(src+i*stride))[1] = b;
|
|
558 |
((pixel4*)(src+i*stride))[2] = c;
|
|
559 |
((pixel4*)(src+i*stride))[3] = d;
|
|
483 | 560 |
} |
484 | 561 |
} |
485 | 562 |
|
486 |
static void pred16x16_horizontal_c(uint8_t *src, int stride){
|
|
563 |
static void FUNCC(pred16x16_horizontal)(uint8_t *p_src, int stride){
|
|
487 | 564 |
int i; |
565 |
pixel *src = (pixel*)p_src; |
|
566 |
stride >>= sizeof(pixel)-1; |
|
488 | 567 |
|
489 | 568 |
for(i=0; i<16; i++){ |
490 |
((uint32_t*)(src+i*stride))[0]=
|
|
491 |
((uint32_t*)(src+i*stride))[1]=
|
|
492 |
((uint32_t*)(src+i*stride))[2]=
|
|
493 |
((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
|
|
569 |
((pixel4*)(src+i*stride))[0] =
|
|
570 |
((pixel4*)(src+i*stride))[1] =
|
|
571 |
((pixel4*)(src+i*stride))[2] =
|
|
572 |
((pixel4*)(src+i*stride))[3] = PIXEL_SPLAT_X4(src[-1+i*stride]);
|
|
494 | 573 |
} |
495 | 574 |
} |
496 | 575 |
|
497 |
static void pred16x16_dc_c(uint8_t *src, int stride){ |
|
576 |
#define PREDICT_16x16_DC(v)\ |
|
577 |
for(i=0; i<16; i++){\ |
|
578 |
AV_WN4P(src+ 0, v);\ |
|
579 |
AV_WN4P(src+ 4, v);\ |
|
580 |
AV_WN4P(src+ 8, v);\ |
|
581 |
AV_WN4P(src+12, v);\ |
|
582 |
src += stride;\ |
|
583 |
} |
|
584 |
|
|
585 |
static void FUNCC(pred16x16_dc)(uint8_t *p_src, int stride){ |
|
498 | 586 |
int i, dc=0; |
587 |
pixel *src = (pixel*)p_src; |
|
588 |
pixel4 dcsplat; |
|
589 |
stride >>= sizeof(pixel)-1; |
|
499 | 590 |
|
500 | 591 |
for(i=0;i<16; i++){ |
501 | 592 |
dc+= src[-1+i*stride]; |
... | ... | |
505 | 596 |
dc+= src[i-stride]; |
506 | 597 |
} |
507 | 598 |
|
508 |
dc= 0x01010101*((dc + 16)>>5); |
|
509 |
|
|
510 |
for(i=0; i<16; i++){ |
|
511 |
((uint32_t*)(src+i*stride))[0]= |
|
512 |
((uint32_t*)(src+i*stride))[1]= |
|
513 |
((uint32_t*)(src+i*stride))[2]= |
|
514 |
((uint32_t*)(src+i*stride))[3]= dc; |
|
515 |
} |
|
599 |
dcsplat = PIXEL_SPLAT_X4((dc+16)>>5); |
|
600 |
PREDICT_16x16_DC(dcsplat); |
|
516 | 601 |
} |
517 | 602 |
|
518 |
static void pred16x16_left_dc_c(uint8_t *src, int stride){
|
|
603 |
static void FUNCC(pred16x16_left_dc)(uint8_t *p_src, int stride){
|
|
519 | 604 |
int i, dc=0; |
605 |
pixel *src = (pixel*)p_src; |
|
606 |
pixel4 dcsplat; |
|
607 |
stride >>= sizeof(pixel)-1; |
|
520 | 608 |
|
521 | 609 |
for(i=0;i<16; i++){ |
522 | 610 |
dc+= src[-1+i*stride]; |
523 | 611 |
} |
524 | 612 |
|
525 |
dc= 0x01010101*((dc + 8)>>4); |
|
526 |
|
|
527 |
for(i=0; i<16; i++){ |
|
528 |
((uint32_t*)(src+i*stride))[0]= |
|
529 |
((uint32_t*)(src+i*stride))[1]= |
|
530 |
((uint32_t*)(src+i*stride))[2]= |
|
531 |
((uint32_t*)(src+i*stride))[3]= dc; |
|
532 |
} |
|
613 |
dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); |
|
614 |
PREDICT_16x16_DC(dcsplat); |
|
533 | 615 |
} |
534 | 616 |
|
535 |
static void pred16x16_top_dc_c(uint8_t *src, int stride){
|
|
617 |
static void FUNCC(pred16x16_top_dc)(uint8_t *p_src, int stride){
|
|
536 | 618 |
int i, dc=0; |
619 |
pixel *src = (pixel*)p_src; |
|
620 |
pixel4 dcsplat; |
|
621 |
stride >>= sizeof(pixel)-1; |
|
537 | 622 |
|
538 | 623 |
for(i=0;i<16; i++){ |
539 | 624 |
dc+= src[i-stride]; |
540 | 625 |
} |
541 |
dc= 0x01010101*((dc + 8)>>4); |
|
542 | 626 |
|
543 |
for(i=0; i<16; i++){ |
|
544 |
((uint32_t*)(src+i*stride))[0]= |
|
545 |
((uint32_t*)(src+i*stride))[1]= |
|
546 |
((uint32_t*)(src+i*stride))[2]= |
|
547 |
((uint32_t*)(src+i*stride))[3]= dc; |
|
548 |
} |
|
627 |
dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); |
|
628 |
PREDICT_16x16_DC(dcsplat); |
|
549 | 629 |
} |
550 | 630 |
|
551 |
static void pred16x16_128_dc_c(uint8_t *src, int stride){ |
|
552 |
int i; |
|
553 |
|
|
554 |
for(i=0; i<16; i++){ |
|
555 |
((uint32_t*)(src+i*stride))[0]= |
|
556 |
((uint32_t*)(src+i*stride))[1]= |
|
557 |
((uint32_t*)(src+i*stride))[2]= |
|
558 |
((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U; |
|
559 |
} |
|
631 |
#define PRED16x16_X(n, v) \ |
|
632 |
static void FUNCC(pred16x16_##n##_dc)(uint8_t *p_src, int stride){\ |
|
633 |
int i;\ |
|
634 |
pixel *src = (pixel*)p_src;\ |
|
635 |
stride >>= sizeof(pixel)-1;\ |
|
636 |
PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\ |
|
560 | 637 |
} |
561 | 638 |
|
562 |
static void pred16x16_127_dc_c(uint8_t *src, int stride){ |
|
563 |
int i; |
|
564 |
|
|
565 |
for(i=0; i<16; i++){ |
|
566 |
((uint32_t*)(src+i*stride))[0]= |
|
567 |
((uint32_t*)(src+i*stride))[1]= |
|
568 |
((uint32_t*)(src+i*stride))[2]= |
|
569 |
((uint32_t*)(src+i*stride))[3]= 0x01010101U*127U; |
|
570 |
} |
|
571 |
} |
|
572 |
|
|
573 |
static void pred16x16_129_dc_c(uint8_t *src, int stride){ |
|
574 |
int i; |
|
575 |
|
|
576 |
for(i=0; i<16; i++){ |
|
577 |
((uint32_t*)(src+i*stride))[0]= |
|
578 |
((uint32_t*)(src+i*stride))[1]= |
|
579 |
((uint32_t*)(src+i*stride))[2]= |
|
580 |
((uint32_t*)(src+i*stride))[3]= 0x01010101U*129U; |
|
581 |
} |
|
582 |
} |
|
639 |
PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1); |
|
640 |
PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0); |
|
641 |
PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1); |
|
583 | 642 |
|
584 |
static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3, const int rv40){
|
|
643 |
static inline void FUNCC(pred16x16_plane_compat)(uint8_t *p_src, int p_stride, const int svq3, const int rv40){
|
|
585 | 644 |
int i, j, k; |
586 | 645 |
int a; |
587 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
|
588 |
const uint8_t * const src0 = src+7-stride; |
|
589 |
const uint8_t *src1 = src+8*stride-1; |
|
590 |
const uint8_t *src2 = src1-2*stride; // == src+6*stride-1; |
|
646 |
INIT_CLIP |
|
647 |
pixel *src = (pixel*)p_src; |
|
648 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
649 |
const pixel * const src0 = src +7-stride; |
|
650 |
const pixel * src1 = src +8*stride-1; |
|
651 |
const pixel * src2 = src1-2*stride; // == src+6*stride-1; |
|
591 | 652 |
int H = src0[1] - src0[-1]; |
592 | 653 |
int V = src1[0] - src2[ 0]; |
593 | 654 |
for(k=2; k<=8; ++k) { |
... | ... | |
614 | 675 |
int b = a; |
615 | 676 |
a += V; |
616 | 677 |
for(i=-16; i<0; i+=4) { |
617 |
src[16+i] = cm[ (b ) >> 5 ];
|
|
618 |
src[17+i] = cm[ (b+ H) >> 5 ];
|
|
619 |
src[18+i] = cm[ (b+2*H) >> 5 ];
|
|
620 |
src[19+i] = cm[ (b+3*H) >> 5 ];
|
|
678 |
src[16+i] = CLIP((b ) >> 5);
|
|
679 |
src[17+i] = CLIP((b+ H) >> 5);
|
|
680 |
src[18+i] = CLIP((b+2*H) >> 5);
|
|
681 |
src[19+i] = CLIP((b+3*H) >> 5);
|
|
621 | 682 |
b += 4*H; |
622 | 683 |
} |
623 | 684 |
src += stride; |
624 | 685 |
} |
625 | 686 |
} |
626 | 687 |
|
627 |
static void pred16x16_plane_c(uint8_t *src, int stride){
|
|
628 |
pred16x16_plane_compat_c(src, stride, 0, 0);
|
|
688 |
static void FUNCC(pred16x16_plane)(uint8_t *src, int stride){
|
|
689 |
FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
|
|
629 | 690 |
} |
630 | 691 |
|
631 |
static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
|
|
632 |
pred16x16_plane_compat_c(src, stride, 1, 0);
|
|
692 |
static void FUNCC(pred16x16_plane_svq3)(uint8_t *src, int stride){
|
|
693 |
FUNCC(pred16x16_plane_compat)(src, stride, 1, 0);
|
|
633 | 694 |
} |
634 | 695 |
|
635 |
static void pred16x16_plane_rv40_c(uint8_t *src, int stride){
|
|
636 |
pred16x16_plane_compat_c(src, stride, 0, 1);
|
|
696 |
static void FUNCC(pred16x16_plane_rv40)(uint8_t *src, int stride){
|
|
697 |
FUNCC(pred16x16_plane_compat)(src, stride, 0, 1);
|
|
637 | 698 |
} |
638 | 699 |
|
639 |
static void pred16x16_tm_vp8_c(uint8_t *src, int stride){
|
|
700 |
static void FUNCC(pred16x16_tm_vp8)(uint8_t *src, int stride){
|
|
640 | 701 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride]; |
641 | 702 |
uint8_t *top = src-stride; |
642 | 703 |
int y; |
... | ... | |
663 | 724 |
} |
664 | 725 |
} |
665 | 726 |
|
666 |
static void pred8x8_vertical_c(uint8_t *src, int stride){
|
|
727 |
static void FUNCC(pred8x8_vertical)(uint8_t *p_src, int p_stride){
|
|
667 | 728 |
int i; |
668 |
const uint32_t a= ((uint32_t*)(src-stride))[0]; |
|
669 |
const uint32_t b= ((uint32_t*)(src-stride))[1]; |
|
729 |
pixel *src = (pixel*)p_src; |
|
730 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
731 |
const pixel4 a= ((pixel4*)(src-stride))[0]; |
|
732 |
const pixel4 b= ((pixel4*)(src-stride))[1]; |
|
670 | 733 |
|
671 | 734 |
for(i=0; i<8; i++){ |
672 |
((uint32_t*)(src+i*stride))[0]= a;
|
|
673 |
((uint32_t*)(src+i*stride))[1]= b;
|
|
735 |
((pixel4*)(src+i*stride))[0]= a;
|
|
736 |
((pixel4*)(src+i*stride))[1]= b;
|
|
674 | 737 |
} |
675 | 738 |
} |
676 | 739 |
|
677 |
static void pred8x8_horizontal_c(uint8_t *src, int stride){
|
|
740 |
static void FUNCC(pred8x8_horizontal)(uint8_t *p_src, int stride){
|
|
678 | 741 |
int i; |
742 |
pixel *src = (pixel*)p_src; |
|
743 |
stride >>= sizeof(pixel)-1; |
|
679 | 744 |
|
680 | 745 |
for(i=0; i<8; i++){ |
681 |
((uint32_t*)(src+i*stride))[0]=
|
|
682 |
((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
|
|
746 |
((pixel4*)(src+i*stride))[0]=
|
|
747 |
((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(src[-1+i*stride]);
|
|
683 | 748 |
} |
684 | 749 |
} |
685 | 750 |
|
686 |
static void pred8x8_128_dc_c(uint8_t *src, int stride){ |
|
687 |
int i; |
|
688 |
|
|
689 |
for(i=0; i<8; i++){ |
|
690 |
((uint32_t*)(src+i*stride))[0]= |
|
691 |
((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U; |
|
692 |
} |
|
751 |
#define PRED8x8_X(n, v)\ |
|
752 |
static void FUNCC(pred8x8_##n##_dc)(uint8_t *p_src, int stride){\ |
|
753 |
int i;\ |
|
754 |
pixel *src = (pixel*)p_src;\ |
|
755 |
stride >>= sizeof(pixel)-1;\ |
|
756 |
for(i=0; i<8; i++){\ |
|
757 |
((pixel4*)(src+i*stride))[0]=\ |
|
758 |
((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(v);\ |
|
759 |
}\ |
|
693 | 760 |
} |
694 | 761 |
|
695 |
static void pred8x8_127_dc_c(uint8_t *src, int stride){ |
|
696 |
int i; |
|
762 |
PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1); |
|
763 |
PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0); |
|
764 |
PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1); |
|
697 | 765 |
|
698 |
for(i=0; i<8; i++){ |
|
699 |
((uint32_t*)(src+i*stride))[0]= |
|
700 |
((uint32_t*)(src+i*stride))[1]= 0x01010101U*127U; |
|
701 |
} |
|
702 |
} |
|
703 |
static void pred8x8_129_dc_c(uint8_t *src, int stride){ |
|
704 |
int i; |
|
705 |
|
|
706 |
for(i=0; i<8; i++){ |
|
707 |
((uint32_t*)(src+i*stride))[0]= |
|
708 |
((uint32_t*)(src+i*stride))[1]= 0x01010101U*129U; |
|
709 |
} |
|
710 |
} |
|
711 |
|
|
712 |
static void pred8x8_left_dc_c(uint8_t *src, int stride){ |
|
766 |
static void FUNCC(pred8x8_left_dc)(uint8_t *p_src, int stride){ |
|
713 | 767 |
int i; |
714 | 768 |
int dc0, dc2; |
769 |
pixel4 dc0splat, dc2splat; |
|
770 |
pixel *src = (pixel*)p_src; |
|
771 |
stride >>= sizeof(pixel)-1; |
|
715 | 772 |
|
716 | 773 |
dc0=dc2=0; |
717 | 774 |
for(i=0;i<4; i++){ |
718 | 775 |
dc0+= src[-1+i*stride]; |
719 | 776 |
dc2+= src[-1+(i+4)*stride]; |
720 | 777 |
} |
721 |
dc0= 0x01010101*((dc0 + 2)>>2);
|
|
722 |
dc2= 0x01010101*((dc2 + 2)>>2);
|
|
778 |
dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
|
|
779 |
dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
|
|
723 | 780 |
|
724 | 781 |
for(i=0; i<4; i++){ |
725 |
((uint32_t*)(src+i*stride))[0]=
|
|
726 |
((uint32_t*)(src+i*stride))[1]= dc0;
|
|
782 |
((pixel4*)(src+i*stride))[0]=
|
|
783 |
((pixel4*)(src+i*stride))[1]= dc0splat;
|
|
727 | 784 |
} |
728 | 785 |
for(i=4; i<8; i++){ |
729 |
((uint32_t*)(src+i*stride))[0]=
|
|
730 |
((uint32_t*)(src+i*stride))[1]= dc2;
|
|
786 |
((pixel4*)(src+i*stride))[0]=
|
|
787 |
((pixel4*)(src+i*stride))[1]= dc2splat;
|
|
731 | 788 |
} |
732 | 789 |
} |
733 | 790 |
|
734 |
static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){
|
|
791 |
static void FUNCC(pred8x8_left_dc_rv40)(uint8_t *p_src, int stride){
|
|
735 | 792 |
int i; |
736 | 793 |
int dc0; |
794 |
pixel4 dc0splat; |
|
795 |
pixel *src = (pixel*)p_src; |
|
796 |
stride >>= sizeof(pixel)-1; |
|
737 | 797 |
|
738 | 798 |
dc0=0; |
739 | 799 |
for(i=0;i<8; i++) |
740 | 800 |
dc0+= src[-1+i*stride]; |
741 |
dc0= 0x01010101*((dc0 + 4)>>3);
|
|
801 |
dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
|
|
742 | 802 |
|
743 | 803 |
for(i=0; i<8; i++){ |
744 |
((uint32_t*)(src+i*stride))[0]=
|
|
745 |
((uint32_t*)(src+i*stride))[1]= dc0;
|
|
804 |
((pixel4*)(src+i*stride))[0]=
|
|
805 |
((pixel4*)(src+i*stride))[1]= dc0splat;
|
|
746 | 806 |
} |
747 | 807 |
} |
748 | 808 |
|
749 |
static void pred8x8_top_dc_c(uint8_t *src, int stride){
|
|
809 |
static void FUNCC(pred8x8_top_dc)(uint8_t *p_src, int stride){
|
|
750 | 810 |
int i; |
751 | 811 |
int dc0, dc1; |
812 |
pixel4 dc0splat, dc1splat; |
|
813 |
pixel *src = (pixel*)p_src; |
|
814 |
stride >>= sizeof(pixel)-1; |
|
752 | 815 |
|
753 | 816 |
dc0=dc1=0; |
754 | 817 |
for(i=0;i<4; i++){ |
755 | 818 |
dc0+= src[i-stride]; |
756 | 819 |
dc1+= src[4+i-stride]; |
757 | 820 |
} |
758 |
dc0= 0x01010101*((dc0 + 2)>>2);
|
|
759 |
dc1= 0x01010101*((dc1 + 2)>>2);
|
|
821 |
dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
|
|
822 |
dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
|
|
760 | 823 |
|
761 | 824 |
for(i=0; i<4; i++){ |
762 |
((uint32_t*)(src+i*stride))[0]= dc0;
|
|
763 |
((uint32_t*)(src+i*stride))[1]= dc1;
|
|
825 |
((pixel4*)(src+i*stride))[0]= dc0splat;
|
|
826 |
((pixel4*)(src+i*stride))[1]= dc1splat;
|
|
764 | 827 |
} |
765 | 828 |
for(i=4; i<8; i++){ |
766 |
((uint32_t*)(src+i*stride))[0]= dc0;
|
|
767 |
((uint32_t*)(src+i*stride))[1]= dc1;
|
|
829 |
((pixel4*)(src+i*stride))[0]= dc0splat;
|
|
830 |
((pixel4*)(src+i*stride))[1]= dc1splat;
|
|
768 | 831 |
} |
769 | 832 |
} |
770 | 833 |
|
771 |
static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){
|
|
834 |
static void FUNCC(pred8x8_top_dc_rv40)(uint8_t *p_src, int stride){
|
|
772 | 835 |
int i; |
773 | 836 |
int dc0; |
837 |
pixel4 dc0splat; |
|
838 |
pixel *src = (pixel*)p_src; |
|
839 |
stride >>= sizeof(pixel)-1; |
|
774 | 840 |
|
775 | 841 |
dc0=0; |
776 | 842 |
for(i=0;i<8; i++) |
777 | 843 |
dc0+= src[i-stride]; |
778 |
dc0= 0x01010101*((dc0 + 4)>>3);
|
|
844 |
dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
|
|
779 | 845 |
|
780 | 846 |
for(i=0; i<8; i++){ |
781 |
((uint32_t*)(src+i*stride))[0]=
|
|
782 |
((uint32_t*)(src+i*stride))[1]= dc0;
|
|
847 |
((pixel4*)(src+i*stride))[0]=
|
|
848 |
((pixel4*)(src+i*stride))[1]= dc0splat;
|
|
783 | 849 |
} |
784 | 850 |
} |
785 | 851 |
|
786 | 852 |
|
787 |
static void pred8x8_dc_c(uint8_t *src, int stride){
|
|
853 |
static void FUNCC(pred8x8_dc)(uint8_t *p_src, int stride){
|
|
788 | 854 |
int i; |
789 |
int dc0, dc1, dc2, dc3; |
|
855 |
int dc0, dc1, dc2; |
|
856 |
pixel4 dc0splat, dc1splat, dc2splat, dc3splat; |
|
857 |
pixel *src = (pixel*)p_src; |
|
858 |
stride >>= sizeof(pixel)-1; |
|
790 | 859 |
|
791 | 860 |
dc0=dc1=dc2=0; |
792 | 861 |
for(i=0;i<4; i++){ |
... | ... | |
794 | 863 |
dc1+= src[4+i-stride]; |
795 | 864 |
dc2+= src[-1+(i+4)*stride]; |
796 | 865 |
} |
797 |
dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
|
|
798 |
dc0= 0x01010101*((dc0 + 4)>>3);
|
|
799 |
dc1= 0x01010101*((dc1 + 2)>>2);
|
|
800 |
dc2= 0x01010101*((dc2 + 2)>>2);
|
|
866 |
dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
|
|
867 |
dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
|
|
868 |
dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
|
|
869 |
dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
|
|
801 | 870 |
|
802 | 871 |
for(i=0; i<4; i++){ |
803 |
((uint32_t*)(src+i*stride))[0]= dc0;
|
|
804 |
((uint32_t*)(src+i*stride))[1]= dc1;
|
|
872 |
((pixel4*)(src+i*stride))[0]= dc0splat;
|
|
873 |
((pixel4*)(src+i*stride))[1]= dc1splat;
|
|
805 | 874 |
} |
806 | 875 |
for(i=4; i<8; i++){ |
807 |
((uint32_t*)(src+i*stride))[0]= dc2;
|
|
808 |
((uint32_t*)(src+i*stride))[1]= dc3;
|
|
876 |
((pixel4*)(src+i*stride))[0]= dc2splat;
|
|
877 |
((pixel4*)(src+i*stride))[1]= dc3splat;
|
|
809 | 878 |
} |
810 | 879 |
} |
811 | 880 |
|
812 | 881 |
//the following 4 function should not be optimized! |
813 |
static void pred8x8_mad_cow_dc_l0t(uint8_t *src, int stride){
|
|
814 |
pred8x8_top_dc_c(src, stride);
|
|
815 |
pred4x4_dc_c(src, NULL, stride);
|
|
882 |
static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){
|
|
883 |
FUNCC(pred8x8_top_dc)(src, stride);
|
|
884 |
FUNCC(pred4x4_dc)(src, NULL, stride);
|
|
816 | 885 |
} |
817 | 886 |
|
818 |
static void pred8x8_mad_cow_dc_0lt(uint8_t *src, int stride){
|
|
819 |
pred8x8_dc_c(src, stride);
|
|
820 |
pred4x4_top_dc_c(src, NULL, stride);
|
|
887 |
static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){
|
|
888 |
FUNCC(pred8x8_dc)(src, stride);
|
|
889 |
FUNCC(pred4x4_top_dc)(src, NULL, stride);
|
|
821 | 890 |
} |
822 | 891 |
|
823 |
static void pred8x8_mad_cow_dc_l00(uint8_t *src, int stride){
|
|
824 |
pred8x8_left_dc_c(src, stride);
|
|
825 |
pred4x4_128_dc_c(src + 4*stride , NULL, stride);
|
|
826 |
pred4x4_128_dc_c(src + 4*stride + 4, NULL, stride);
|
|
892 |
static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){
|
|
893 |
FUNCC(pred8x8_left_dc)(src, stride);
|
|
894 |
FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
|
|
895 |
FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
|
|
827 | 896 |
} |
828 | 897 |
|
829 |
static void pred8x8_mad_cow_dc_0l0(uint8_t *src, int stride){
|
|
830 |
pred8x8_left_dc_c(src, stride);
|
|
831 |
pred4x4_128_dc_c(src , NULL, stride);
|
|
832 |
pred4x4_128_dc_c(src + 4, NULL, stride);
|
|
898 |
static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){
|
|
899 |
FUNCC(pred8x8_left_dc)(src, stride);
|
|
900 |
FUNCC(pred4x4_128_dc)(src , NULL, stride);
|
|
901 |
FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
|
|
833 | 902 |
} |
834 | 903 |
|
835 |
|
|
836 |
static void pred8x8_dc_rv40_c(uint8_t *src, int stride){ |
|
904 |
static void FUNCC(pred8x8_dc_rv40)(uint8_t *p_src, int stride){ |
|
837 | 905 |
int i; |
838 | 906 |
int dc0=0; |
907 |
pixel4 dc0splat; |
|
908 |
pixel *src = (pixel*)p_src; |
|
909 |
stride >>= sizeof(pixel)-1; |
|
839 | 910 |
|
840 | 911 |
for(i=0;i<4; i++){ |
841 | 912 |
dc0+= src[-1+i*stride] + src[i-stride]; |
842 | 913 |
dc0+= src[4+i-stride]; |
843 | 914 |
dc0+= src[-1+(i+4)*stride]; |
844 | 915 |
} |
845 |
dc0= 0x01010101*((dc0 + 8)>>4);
|
|
916 |
dc0splat = PIXEL_SPLAT_X4((dc0 + 8)>>4);
|
|
846 | 917 |
|
847 | 918 |
for(i=0; i<4; i++){ |
848 |
((uint32_t*)(src+i*stride))[0]= dc0;
|
|
849 |
((uint32_t*)(src+i*stride))[1]= dc0;
|
|
919 |
((pixel4*)(src+i*stride))[0]= dc0splat;
|
|
920 |
((pixel4*)(src+i*stride))[1]= dc0splat;
|
|
850 | 921 |
} |
851 | 922 |
for(i=4; i<8; i++){ |
852 |
((uint32_t*)(src+i*stride))[0]= dc0;
|
|
853 |
((uint32_t*)(src+i*stride))[1]= dc0;
|
|
923 |
((pixel4*)(src+i*stride))[0]= dc0splat;
|
|
924 |
((pixel4*)(src+i*stride))[1]= dc0splat;
|
|
854 | 925 |
} |
855 | 926 |
} |
856 | 927 |
|
857 |
static void pred8x8_plane_c(uint8_t *src, int stride){
|
|
928 |
static void FUNCC(pred8x8_plane)(uint8_t *p_src, int p_stride){
|
|
858 | 929 |
int j, k; |
859 | 930 |
int a; |
860 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
|
861 |
const uint8_t * const src0 = src+3-stride; |
|
862 |
const uint8_t *src1 = src+4*stride-1; |
|
863 |
const uint8_t *src2 = src1-2*stride; // == src+2*stride-1; |
|
931 |
INIT_CLIP |
|
932 |
pixel *src = (pixel*)p_src; |
|
933 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
934 |
const pixel * const src0 = src +3-stride; |
|
935 |
const pixel * src1 = src +4*stride-1; |
|
936 |
const pixel * src2 = src1-2*stride; // == src+2*stride-1; |
|
864 | 937 |
int H = src0[1] - src0[-1]; |
865 | 938 |
int V = src1[0] - src2[ 0]; |
866 | 939 |
for(k=2; k<=4; ++k) { |
... | ... | |
875 | 948 |
for(j=8; j>0; --j) { |
876 | 949 |
int b = a; |
877 | 950 |
a += V; |
878 |
src[0] = cm[ (b ) >> 5 ];
|
|
879 |
src[1] = cm[ (b+ H) >> 5 ];
|
|
880 |
src[2] = cm[ (b+2*H) >> 5 ];
|
|
881 |
src[3] = cm[ (b+3*H) >> 5 ];
|
|
882 |
src[4] = cm[ (b+4*H) >> 5 ];
|
|
883 |
src[5] = cm[ (b+5*H) >> 5 ];
|
|
884 |
src[6] = cm[ (b+6*H) >> 5 ];
|
|
885 |
src[7] = cm[ (b+7*H) >> 5 ];
|
|
951 |
src[0] = CLIP((b ) >> 5);
|
|
952 |
src[1] = CLIP((b+ H) >> 5);
|
|
953 |
src[2] = CLIP((b+2*H) >> 5);
|
|
954 |
src[3] = CLIP((b+3*H) >> 5);
|
|
955 |
src[4] = CLIP((b+4*H) >> 5);
|
|
956 |
src[5] = CLIP((b+5*H) >> 5);
|
|
957 |
src[6] = CLIP((b+6*H) >> 5);
|
|
958 |
src[7] = CLIP((b+7*H) >> 5);
|
|
886 | 959 |
src += stride; |
887 | 960 |
} |
888 | 961 |
} |
889 | 962 |
|
890 |
static void pred8x8_tm_vp8_c(uint8_t *src, int stride){ |
|
963 |
static void FUNCC(pred8x8_tm_vp8)(uint8_t *p_src, int p_stride){ |
|
964 |
pixel *src = (pixel*)p_src; |
|
965 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
891 | 966 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride]; |
892 |
uint8_t *top = src-stride;
|
|
967 |
pixel *top = src-stride;
|
|
893 | 968 |
int y; |
894 | 969 |
|
895 | 970 |
for (y = 0; y < 8; y++) { |
... | ... | |
939 | 1014 |
#define PREDICT_8x8_DC(v) \ |
940 | 1015 |
int y; \ |
941 | 1016 |
for( y = 0; y < 8; y++ ) { \ |
942 |
((uint32_t*)src)[0] = \
|
|
943 |
((uint32_t*)src)[1] = v; \
|
|
1017 |
((pixel4*)src)[0] = \
|
|
1018 |
((pixel4*)src)[1] = v; \
|
|
944 | 1019 |
src += stride; \ |
945 | 1020 |
} |
946 | 1021 |
|
947 |
static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
|
|
1022 |
static void FUNCC(pred8x8l_128_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
|
|
948 | 1023 |
{ |
949 |
PREDICT_8x8_DC(0x80808080); |
|
1024 |
pixel *src = (pixel*)p_src; |
|
1025 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
1026 |
|
|
1027 |
PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1))); |
|
950 | 1028 |
} |
951 |
static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
|
|
1029 |
static void FUNCC(pred8x8l_left_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
|
|
952 | 1030 |
{ |
1031 |
pixel *src = (pixel*)p_src; |
|
1032 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
1033 |
|
|
953 | 1034 |
PREDICT_8x8_LOAD_LEFT; |
954 |
const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
|
|
1035 |
const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
|
|
955 | 1036 |
PREDICT_8x8_DC(dc); |
956 | 1037 |
} |
957 |
static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
|
|
1038 |
static void FUNCC(pred8x8l_top_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
|
|
958 | 1039 |
{ |
1040 |
pixel *src = (pixel*)p_src; |
|
1041 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
1042 |
|
|
959 | 1043 |
PREDICT_8x8_LOAD_TOP; |
960 |
const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
|
|
1044 |
const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
|
|
961 | 1045 |
PREDICT_8x8_DC(dc); |
962 | 1046 |
} |
963 |
static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
|
|
1047 |
static void FUNCC(pred8x8l_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
|
|
964 | 1048 |
{ |
1049 |
pixel *src = (pixel*)p_src; |
|
1050 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
1051 |
|
|
965 | 1052 |
PREDICT_8x8_LOAD_LEFT; |
966 | 1053 |
PREDICT_8x8_LOAD_TOP; |
967 |
const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
|
|
968 |
+t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
|
|
1054 |
const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
|
|
1055 |
+t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
|
|
969 | 1056 |
PREDICT_8x8_DC(dc); |
970 | 1057 |
} |
971 |
static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
|
|
1058 |
static void FUNCC(pred8x8l_horizontal)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
|
|
972 | 1059 |
{ |
1060 |
pixel *src = (pixel*)p_src; |
|
1061 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
1062 |
|
|
973 | 1063 |
PREDICT_8x8_LOAD_LEFT; |
974 |
#define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
|
|
975 |
((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
|
|
1064 |
#define ROW(y) ((pixel4*)(src+y*stride))[0] =\
|
|
1065 |
((pixel4*)(src+y*stride))[1] = PIXEL_SPLAT_X4(l##y)
|
|
976 | 1066 |
ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7); |
977 | 1067 |
#undef ROW |
978 | 1068 |
} |
979 |
static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
|
|
1069 |
static void FUNCC(pred8x8l_vertical)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
|
|
980 | 1070 |
{ |
981 | 1071 |
int y; |
1072 |
pixel *src = (pixel*)p_src; |
|
1073 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
1074 |
|
|
982 | 1075 |
PREDICT_8x8_LOAD_TOP; |
983 | 1076 |
src[0] = t0; |
984 | 1077 |
src[1] = t1; |
... | ... | |
988 | 1081 |
src[5] = t5; |
989 | 1082 |
src[6] = t6; |
990 | 1083 |
src[7] = t7; |
991 |
for( y = 1; y < 8; y++ ) |
|
992 |
*(uint64_t*)(src+y*stride) = *(uint64_t*)src; |
|
1084 |
for( y = 1; y < 8; y++ ) { |
|
1085 |
((pixel4*)(src+y*stride))[0] = ((pixel4*)src)[0]; |
|
1086 |
((pixel4*)(src+y*stride))[1] = ((pixel4*)src)[1]; |
|
1087 |
} |
|
993 | 1088 |
} |
994 |
static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
|
|
1089 |
static void FUNCC(pred8x8l_down_left)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
|
|
995 | 1090 |
{ |
1091 |
pixel *src = (pixel*)p_src; |
|
1092 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
996 | 1093 |
PREDICT_8x8_LOAD_TOP; |
997 | 1094 |
PREDICT_8x8_LOAD_TOPRIGHT; |
998 | 1095 |
SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2; |
... | ... | |
1011 | 1108 |
SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2; |
1012 | 1109 |
SRC(7,7)= (t14 + 3*t15 + 2) >> 2; |
1013 | 1110 |
} |
1014 |
static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
|
|
1111 |
static void FUNCC(pred8x8l_down_right)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
|
|
1015 | 1112 |
{ |
1113 |
pixel *src = (pixel*)p_src; |
|
1114 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
1016 | 1115 |
PREDICT_8x8_LOAD_TOP; |
1017 | 1116 |
PREDICT_8x8_LOAD_LEFT; |
1018 | 1117 |
PREDICT_8x8_LOAD_TOPLEFT; |
... | ... | |
1031 | 1130 |
SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2; |
1032 | 1131 |
SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2; |
1033 | 1132 |
SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2; |
1034 |
|
|
1035 | 1133 |
} |
1036 |
static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
|
|
1134 |
static void FUNCC(pred8x8l_vertical_right)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
|
|
1037 | 1135 |
{ |
1136 |
pixel *src = (pixel*)p_src; |
|
1137 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
1038 | 1138 |
PREDICT_8x8_LOAD_TOP; |
1039 | 1139 |
PREDICT_8x8_LOAD_LEFT; |
1040 | 1140 |
PREDICT_8x8_LOAD_TOPLEFT; |
... | ... | |
1061 | 1161 |
SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2; |
1062 | 1162 |
SRC(7,0)= (t6 + t7 + 1) >> 1; |
1063 | 1163 |
} |
1064 |
static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
|
|
1164 |
static void FUNCC(pred8x8l_horizontal_down)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
|
|
1065 | 1165 |
{ |
1166 |
pixel *src = (pixel*)p_src; |
|
1167 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
1066 | 1168 |
PREDICT_8x8_LOAD_TOP; |
1067 | 1169 |
PREDICT_8x8_LOAD_LEFT; |
1068 | 1170 |
PREDICT_8x8_LOAD_TOPLEFT; |
... | ... | |
1089 | 1191 |
SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2; |
1090 | 1192 |
SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2; |
1091 | 1193 |
} |
1092 |
static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
|
|
1194 |
static void FUNCC(pred8x8l_vertical_left)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
|
|
1093 | 1195 |
{ |
1196 |
pixel *src = (pixel*)p_src; |
|
1197 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
1094 | 1198 |
PREDICT_8x8_LOAD_TOP; |
1095 | 1199 |
PREDICT_8x8_LOAD_TOPRIGHT; |
1096 | 1200 |
SRC(0,0)= (t0 + t1 + 1) >> 1; |
... | ... | |
1116 | 1220 |
SRC(7,6)= (t10 + t11 + 1) >> 1; |
1117 | 1221 |
SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2; |
1118 | 1222 |
} |
1119 |
static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
|
|
1223 |
static void FUNCC(pred8x8l_horizontal_up)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
|
|
1120 | 1224 |
{ |
1225 |
pixel *src = (pixel*)p_src; |
|
1226 |
int stride = p_stride>>(sizeof(pixel)-1); |
|
1121 | 1227 |
PREDICT_8x8_LOAD_LEFT; |
1122 | 1228 |
SRC(0,0)= (l0 + l1 + 1) >> 1; |
1123 | 1229 |
SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2; |
... | ... | |
1148 | 1254 |
#undef PL |
1149 | 1255 |
#undef SRC |
1150 | 1256 |
|
1151 |
static void pred4x4_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
|
|
1257 |
static void FUNCC(pred4x4_vertical_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
|
|
1152 | 1258 |
int i; |
1259 |
pixel *pix = (pixel*)p_pix; |
|
1260 |
const dctcoef *block = (const dctcoef*)p_block; |
|
1261 |
stride >>= sizeof(pixel)-1; |
|
1153 | 1262 |
pix -= stride; |
1154 | 1263 |
for(i=0; i<4; i++){ |
1155 |
uint8_t v = pix[0];
|
|
1264 |
pixel v = pix[0];
|
|
1156 | 1265 |
pix[1*stride]= v += block[0]; |
1157 | 1266 |
pix[2*stride]= v += block[4]; |
1158 | 1267 |
pix[3*stride]= v += block[8]; |
... | ... | |
1162 | 1271 |
} |
1163 | 1272 |
} |
1164 | 1273 |
|
1165 |
static void pred4x4_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
|
|
1274 |
static void FUNCC(pred4x4_horizontal_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
|
|
1166 | 1275 |
int i; |
1276 |
pixel *pix = (pixel*)p_pix; |
|
1277 |
const dctcoef *block = (const dctcoef*)p_block; |
|
1278 |
stride >>= sizeof(pixel)-1; |
|
1167 | 1279 |
for(i=0; i<4; i++){ |
1168 |
uint8_t v = pix[-1];
|
|
1280 |
pixel v = pix[-1];
|
|
1169 | 1281 |
pix[0]= v += block[0]; |
1170 | 1282 |
pix[1]= v += block[1]; |
1171 | 1283 |
pix[2]= v += block[2]; |
... | ... | |
1175 | 1287 |
} |
1176 | 1288 |
} |
1177 | 1289 |
|
1178 |
static void pred8x8l_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
|
|
1290 |
static void FUNCC(pred8x8l_vertical_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
|
|
1179 | 1291 |
int i; |
1292 |
pixel *pix = (pixel*)p_pix; |
|
1293 |
const dctcoef *block = (const dctcoef*)p_block; |
|
1294 |
stride >>= sizeof(pixel)-1; |
|
1180 | 1295 |
pix -= stride; |
1181 | 1296 |
for(i=0; i<8; i++){ |
1182 |
uint8_t v = pix[0];
|
|
1297 |
pixel v = pix[0];
|
|
1183 | 1298 |
pix[1*stride]= v += block[0]; |
1184 | 1299 |
pix[2*stride]= v += block[8]; |
1185 | 1300 |
pix[3*stride]= v += block[16]; |
... | ... | |
1193 | 1308 |
} |
1194 | 1309 |
} |
1195 | 1310 |
|
1196 |
static void pred8x8l_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
|
|
1311 |
static void FUNCC(pred8x8l_horizontal_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
|
|
1197 | 1312 |
int i; |
1313 |
pixel *pix = (pixel*)p_pix; |
|
1314 |
const dctcoef *block = (const dctcoef*)p_block; |
|
1315 |
stride >>= sizeof(pixel)-1; |
|
1198 | 1316 |
for(i=0; i<8; i++){ |
1199 |
uint8_t v = pix[-1];
|
|
1317 |
pixel v = pix[-1];
|
|
1200 | 1318 |
pix[0]= v += block[0]; |
1201 | 1319 |
pix[1]= v += block[1]; |
1202 | 1320 |
pix[2]= v += block[2]; |
... | ... | |
1210 | 1328 |
} |
1211 | 1329 |
} |
1212 | 1330 |
|
1213 |
static void pred16x16_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
|
|
1331 |
static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
|
|
1214 | 1332 |
int i; |
1215 | 1333 |
for(i=0; i<16; i++) |
1216 |
pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride);
|
|
1334 |
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
|
|
1217 | 1335 |
} |
1218 | 1336 |
|
1219 |
static void pred16x16_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
|
|
1337 |
static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
|
|
1220 | 1338 |
int i; |
1221 | 1339 |
for(i=0; i<16; i++) |
1222 |
pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride);
|
|
1340 |
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
|
|
1223 | 1341 |
} |
1224 | 1342 |
|
1225 |
static void pred8x8_vertical_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
|
|
1343 |
static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
|
|
1226 | 1344 |
int i; |
1227 | 1345 |
for(i=0; i<4; i++) |
1228 |
pred4x4_vertical_add_c(pix + block_offset[i], block + i*16, stride);
|
|
1346 |
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
|
|
1229 | 1347 |
} |
1230 | 1348 |
|
1231 |
static void pred8x8_horizontal_add_c(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
|
|
1349 |
static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
|
|
1232 | 1350 |
int i; |
1233 | 1351 |
for(i=0; i<4; i++) |
1234 |
pred4x4_horizontal_add_c(pix + block_offset[i], block + i*16, stride);
|
|
1352 |
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
|
|
1235 | 1353 |
} |
Also available in: Unified diff