ffmpeg / libavcodec / h264pred_internal.h @ 8dbe5856
History | View | Annotate | Download (44.2 KB)
1 |
/*
|
---|---|
2 |
* H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
|
3 |
* Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
|
4 |
*
|
5 |
* This file is part of FFmpeg.
|
6 |
*
|
7 |
* FFmpeg is free software; you can redistribute it and/or
|
8 |
* modify it under the terms of the GNU Lesser General Public
|
9 |
* License as published by the Free Software Foundation; either
|
10 |
* version 2.1 of the License, or (at your option) any later version.
|
11 |
*
|
12 |
* FFmpeg is distributed in the hope that it will be useful,
|
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15 |
* Lesser General Public License for more details.
|
16 |
*
|
17 |
* You should have received a copy of the GNU Lesser General Public
|
18 |
* License along with FFmpeg; if not, write to the Free Software
|
19 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
20 |
*/
|
21 |
|
22 |
/**
|
23 |
* @file
|
24 |
* H.264 / AVC / MPEG4 part10 prediction functions.
|
25 |
* @author Michael Niedermayer <michaelni@gmx.at>
|
26 |
*/
|
27 |
|
28 |
#include "mathops.h" |
29 |
#include "h264_high_depth.h" |
30 |
|
31 |
static void FUNCC(pred4x4_vertical)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
32 |
pixel *src = (pixel*)p_src; |
33 |
int stride = p_stride>>(sizeof(pixel)-1); |
34 |
const pixel4 a= ((pixel4*)(src-stride))[0]; |
35 |
((pixel4*)(src+0*stride))[0]= a; |
36 |
((pixel4*)(src+1*stride))[0]= a; |
37 |
((pixel4*)(src+2*stride))[0]= a; |
38 |
((pixel4*)(src+3*stride))[0]= a; |
39 |
} |
40 |
|
41 |
static void FUNCC(pred4x4_horizontal)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
42 |
pixel *src = (pixel*)p_src; |
43 |
int stride = p_stride>>(sizeof(pixel)-1); |
44 |
((pixel4*)(src+0*stride))[0]= PIXEL_SPLAT_X4(src[-1+0*stride]); |
45 |
((pixel4*)(src+1*stride))[0]= PIXEL_SPLAT_X4(src[-1+1*stride]); |
46 |
((pixel4*)(src+2*stride))[0]= PIXEL_SPLAT_X4(src[-1+2*stride]); |
47 |
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(src[-1+3*stride]); |
48 |
} |
49 |
|
50 |
static void FUNCC(pred4x4_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
51 |
pixel *src = (pixel*)p_src; |
52 |
int stride = p_stride>>(sizeof(pixel)-1); |
53 |
const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] |
54 |
+ src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3; |
55 |
|
56 |
((pixel4*)(src+0*stride))[0]= |
57 |
((pixel4*)(src+1*stride))[0]= |
58 |
((pixel4*)(src+2*stride))[0]= |
59 |
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc); |
60 |
} |
61 |
|
62 |
static void FUNCC(pred4x4_left_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
63 |
pixel *src = (pixel*)p_src; |
64 |
int stride = p_stride>>(sizeof(pixel)-1); |
65 |
const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2; |
66 |
|
67 |
((pixel4*)(src+0*stride))[0]= |
68 |
((pixel4*)(src+1*stride))[0]= |
69 |
((pixel4*)(src+2*stride))[0]= |
70 |
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc); |
71 |
} |
72 |
|
73 |
static void FUNCC(pred4x4_top_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
74 |
pixel *src = (pixel*)p_src; |
75 |
int stride = p_stride>>(sizeof(pixel)-1); |
76 |
const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2; |
77 |
|
78 |
((pixel4*)(src+0*stride))[0]= |
79 |
((pixel4*)(src+1*stride))[0]= |
80 |
((pixel4*)(src+2*stride))[0]= |
81 |
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc); |
82 |
} |
83 |
|
84 |
static void FUNCC(pred4x4_128_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
85 |
pixel *src = (pixel*)p_src; |
86 |
int stride = p_stride>>(sizeof(pixel)-1); |
87 |
((pixel4*)(src+0*stride))[0]= |
88 |
((pixel4*)(src+1*stride))[0]= |
89 |
((pixel4*)(src+2*stride))[0]= |
90 |
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)); |
91 |
} |
92 |
|
93 |
static void FUNCC(pred4x4_127_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
94 |
pixel *src = (pixel*)p_src; |
95 |
int stride = p_stride>>(sizeof(pixel)-1); |
96 |
((pixel4*)(src+0*stride))[0]= |
97 |
((pixel4*)(src+1*stride))[0]= |
98 |
((pixel4*)(src+2*stride))[0]= |
99 |
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1); |
100 |
} |
101 |
|
102 |
static void FUNCC(pred4x4_129_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
103 |
pixel *src = (pixel*)p_src; |
104 |
int stride = p_stride>>(sizeof(pixel)-1); |
105 |
((pixel4*)(src+0*stride))[0]= |
106 |
((pixel4*)(src+1*stride))[0]= |
107 |
((pixel4*)(src+2*stride))[0]= |
108 |
((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1); |
109 |
} |
110 |
|
111 |
|
112 |
#define LOAD_TOP_RIGHT_EDGE\
|
113 |
const int av_unused t4= topright[0];\ |
114 |
const int av_unused t5= topright[1];\ |
115 |
const int av_unused t6= topright[2];\ |
116 |
const int av_unused t7= topright[3];\ |
117 |
|
118 |
#define LOAD_DOWN_LEFT_EDGE\
|
119 |
const int av_unused l4= src[-1+4*stride];\ |
120 |
const int av_unused l5= src[-1+5*stride];\ |
121 |
const int av_unused l6= src[-1+6*stride];\ |
122 |
const int av_unused l7= src[-1+7*stride];\ |
123 |
|
124 |
#define LOAD_LEFT_EDGE\
|
125 |
const int av_unused l0= src[-1+0*stride];\ |
126 |
const int av_unused l1= src[-1+1*stride];\ |
127 |
const int av_unused l2= src[-1+2*stride];\ |
128 |
const int av_unused l3= src[-1+3*stride];\ |
129 |
|
130 |
#define LOAD_TOP_EDGE\
|
131 |
const int av_unused t0= src[ 0-1*stride];\ |
132 |
const int av_unused t1= src[ 1-1*stride];\ |
133 |
const int av_unused t2= src[ 2-1*stride];\ |
134 |
const int av_unused t3= src[ 3-1*stride];\ |
135 |
|
136 |
static void FUNCC(pred4x4_vertical_vp8)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
137 |
pixel *src = (pixel*)p_src; |
138 |
const pixel *topright = (const pixel*)p_topright; |
139 |
int stride = p_stride>>(sizeof(pixel)-1); |
140 |
const int lt= src[-1-1*stride]; |
141 |
LOAD_TOP_EDGE |
142 |
LOAD_TOP_RIGHT_EDGE |
143 |
pixel4 v = PACK_4U8((lt + 2*t0 + t1 + 2) >> 2, |
144 |
(t0 + 2*t1 + t2 + 2) >> 2, |
145 |
(t1 + 2*t2 + t3 + 2) >> 2, |
146 |
(t2 + 2*t3 + t4 + 2) >> 2); |
147 |
|
148 |
AV_WN4PA(src+0*stride, v);
|
149 |
AV_WN4PA(src+1*stride, v);
|
150 |
AV_WN4PA(src+2*stride, v);
|
151 |
AV_WN4PA(src+3*stride, v);
|
152 |
} |
153 |
|
154 |
static void FUNCC(pred4x4_horizontal_vp8)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
155 |
pixel *src = (pixel*)p_src; |
156 |
int stride = p_stride>>(sizeof(pixel)-1); |
157 |
const int lt= src[-1-1*stride]; |
158 |
LOAD_LEFT_EDGE |
159 |
|
160 |
AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4((lt + 2*l0 + l1 + 2) >> 2)); |
161 |
AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4((l0 + 2*l1 + l2 + 2) >> 2)); |
162 |
AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4((l1 + 2*l2 + l3 + 2) >> 2)); |
163 |
AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4((l2 + 2*l3 + l3 + 2) >> 2)); |
164 |
} |
165 |
|
166 |
static void FUNCC(pred4x4_down_right)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
167 |
pixel *src = (pixel*)p_src; |
168 |
int stride = p_stride>>(sizeof(pixel)-1); |
169 |
const int lt= src[-1-1*stride]; |
170 |
LOAD_TOP_EDGE |
171 |
LOAD_LEFT_EDGE |
172 |
|
173 |
src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; |
174 |
src[0+2*stride]= |
175 |
src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; |
176 |
src[0+1*stride]= |
177 |
src[1+2*stride]= |
178 |
src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; |
179 |
src[0+0*stride]= |
180 |
src[1+1*stride]= |
181 |
src[2+2*stride]= |
182 |
src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; |
183 |
src[1+0*stride]= |
184 |
src[2+1*stride]= |
185 |
src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2; |
186 |
src[2+0*stride]= |
187 |
src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; |
188 |
src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2; |
189 |
} |
190 |
|
191 |
static void FUNCC(pred4x4_down_left)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
192 |
pixel *src = (pixel*)p_src; |
193 |
const pixel *topright = (const pixel*)p_topright; |
194 |
int stride = p_stride>>(sizeof(pixel)-1); |
195 |
LOAD_TOP_EDGE |
196 |
LOAD_TOP_RIGHT_EDGE |
197 |
// LOAD_LEFT_EDGE
|
198 |
|
199 |
src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2; |
200 |
src[1+0*stride]= |
201 |
src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2; |
202 |
src[2+0*stride]= |
203 |
src[1+1*stride]= |
204 |
src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2; |
205 |
src[3+0*stride]= |
206 |
src[2+1*stride]= |
207 |
src[1+2*stride]= |
208 |
src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2; |
209 |
src[3+1*stride]= |
210 |
src[2+2*stride]= |
211 |
src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2; |
212 |
src[3+2*stride]= |
213 |
src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2; |
214 |
src[3+3*stride]=(t6 + 3*t7 + 2)>>2; |
215 |
} |
216 |
|
217 |
static void FUNCC(pred4x4_down_left_svq3)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
218 |
pixel *src = (pixel*)p_src; |
219 |
int stride = p_stride>>(sizeof(pixel)-1); |
220 |
LOAD_TOP_EDGE |
221 |
LOAD_LEFT_EDGE |
222 |
const av_unused int unu0= t0; |
223 |
const av_unused int unu1= l0; |
224 |
|
225 |
src[0+0*stride]=(l1 + t1)>>1; |
226 |
src[1+0*stride]= |
227 |
src[0+1*stride]=(l2 + t2)>>1; |
228 |
src[2+0*stride]= |
229 |
src[1+1*stride]= |
230 |
src[0+2*stride]= |
231 |
src[3+0*stride]= |
232 |
src[2+1*stride]= |
233 |
src[1+2*stride]= |
234 |
src[0+3*stride]= |
235 |
src[3+1*stride]= |
236 |
src[2+2*stride]= |
237 |
src[1+3*stride]= |
238 |
src[3+2*stride]= |
239 |
src[2+3*stride]= |
240 |
src[3+3*stride]=(l3 + t3)>>1; |
241 |
} |
242 |
|
243 |
static void FUNCC(pred4x4_down_left_rv40)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
244 |
pixel *src = (pixel*)p_src; |
245 |
const pixel *topright = (const pixel*)p_topright; |
246 |
int stride = p_stride>>(sizeof(pixel)-1); |
247 |
LOAD_TOP_EDGE |
248 |
LOAD_TOP_RIGHT_EDGE |
249 |
LOAD_LEFT_EDGE |
250 |
LOAD_DOWN_LEFT_EDGE |
251 |
|
252 |
src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3; |
253 |
src[1+0*stride]= |
254 |
src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3; |
255 |
src[2+0*stride]= |
256 |
src[1+1*stride]= |
257 |
src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + l4 + 2*l3 + 2)>>3; |
258 |
src[3+0*stride]= |
259 |
src[2+1*stride]= |
260 |
src[1+2*stride]= |
261 |
src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3 + l5 + 2*l4 + 2)>>3; |
262 |
src[3+1*stride]= |
263 |
src[2+2*stride]= |
264 |
src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l4 + l6 + 2*l5 + 2)>>3; |
265 |
src[3+2*stride]= |
266 |
src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l5 + l7 + 2*l6 + 2)>>3; |
267 |
src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2; |
268 |
} |
269 |
|
270 |
static void FUNCC(pred4x4_down_left_rv40_nodown)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
271 |
pixel *src = (pixel*)p_src; |
272 |
const pixel *topright = (const pixel*)p_topright; |
273 |
int stride = p_stride>>(sizeof(pixel)-1); |
274 |
LOAD_TOP_EDGE |
275 |
LOAD_TOP_RIGHT_EDGE |
276 |
LOAD_LEFT_EDGE |
277 |
|
278 |
src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3; |
279 |
src[1+0*stride]= |
280 |
src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3; |
281 |
src[2+0*stride]= |
282 |
src[1+1*stride]= |
283 |
src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + 3*l3 + 2)>>3; |
284 |
src[3+0*stride]= |
285 |
src[2+1*stride]= |
286 |
src[1+2*stride]= |
287 |
src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3*4 + 2)>>3; |
288 |
src[3+1*stride]= |
289 |
src[2+2*stride]= |
290 |
src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l3*4 + 2)>>3; |
291 |
src[3+2*stride]= |
292 |
src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l3*4 + 2)>>3; |
293 |
src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2; |
294 |
} |
295 |
|
296 |
static void FUNCC(pred4x4_vertical_right)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
297 |
pixel *src = (pixel*)p_src; |
298 |
int stride = p_stride>>(sizeof(pixel)-1); |
299 |
const int lt= src[-1-1*stride]; |
300 |
LOAD_TOP_EDGE |
301 |
LOAD_LEFT_EDGE |
302 |
|
303 |
src[0+0*stride]= |
304 |
src[1+2*stride]=(lt + t0 + 1)>>1; |
305 |
src[1+0*stride]= |
306 |
src[2+2*stride]=(t0 + t1 + 1)>>1; |
307 |
src[2+0*stride]= |
308 |
src[3+2*stride]=(t1 + t2 + 1)>>1; |
309 |
src[3+0*stride]=(t2 + t3 + 1)>>1; |
310 |
src[0+1*stride]= |
311 |
src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2; |
312 |
src[1+1*stride]= |
313 |
src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2; |
314 |
src[2+1*stride]= |
315 |
src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2; |
316 |
src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2; |
317 |
src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2; |
318 |
src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; |
319 |
} |
320 |
|
321 |
static void FUNCC(pred4x4_vertical_left)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
322 |
pixel *src = (pixel*)p_src; |
323 |
const pixel *topright = (const pixel*)p_topright; |
324 |
int stride = p_stride>>(sizeof(pixel)-1); |
325 |
LOAD_TOP_EDGE |
326 |
LOAD_TOP_RIGHT_EDGE |
327 |
|
328 |
src[0+0*stride]=(t0 + t1 + 1)>>1; |
329 |
src[1+0*stride]= |
330 |
src[0+2*stride]=(t1 + t2 + 1)>>1; |
331 |
src[2+0*stride]= |
332 |
src[1+2*stride]=(t2 + t3 + 1)>>1; |
333 |
src[3+0*stride]= |
334 |
src[2+2*stride]=(t3 + t4+ 1)>>1; |
335 |
src[3+2*stride]=(t4 + t5+ 1)>>1; |
336 |
src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; |
337 |
src[1+1*stride]= |
338 |
src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; |
339 |
src[2+1*stride]= |
340 |
src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; |
341 |
src[3+1*stride]= |
342 |
src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; |
343 |
src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; |
344 |
} |
345 |
|
346 |
static void FUNCC(pred4x4_vertical_left_rv40_internal)(uint8_t *p_src, const uint8_t *p_topright, int p_stride, |
347 |
const int l0, const int l1, const int l2, const int l3, const int l4){ |
348 |
pixel *src = (pixel*)p_src; |
349 |
const pixel *topright = (const pixel*)p_topright; |
350 |
int stride = p_stride>>(sizeof(pixel)-1); |
351 |
LOAD_TOP_EDGE |
352 |
LOAD_TOP_RIGHT_EDGE |
353 |
|
354 |
src[0+0*stride]=(2*t0 + 2*t1 + l1 + 2*l2 + l3 + 4)>>3; |
355 |
src[1+0*stride]= |
356 |
src[0+2*stride]=(t1 + t2 + 1)>>1; |
357 |
src[2+0*stride]= |
358 |
src[1+2*stride]=(t2 + t3 + 1)>>1; |
359 |
src[3+0*stride]= |
360 |
src[2+2*stride]=(t3 + t4+ 1)>>1; |
361 |
src[3+2*stride]=(t4 + t5+ 1)>>1; |
362 |
src[0+1*stride]=(t0 + 2*t1 + t2 + l2 + 2*l3 + l4 + 4)>>3; |
363 |
src[1+1*stride]= |
364 |
src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; |
365 |
src[2+1*stride]= |
366 |
src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; |
367 |
src[3+1*stride]= |
368 |
src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; |
369 |
src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2; |
370 |
} |
371 |
|
372 |
static void FUNCC(pred4x4_vertical_left_rv40)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
373 |
pixel *src = (pixel*)p_src; |
374 |
int stride = p_stride>>(sizeof(pixel)-1); |
375 |
LOAD_LEFT_EDGE |
376 |
LOAD_DOWN_LEFT_EDGE |
377 |
|
378 |
FUNCC(pred4x4_vertical_left_rv40_internal)(p_src, topright, p_stride, l0, l1, l2, l3, l4); |
379 |
} |
380 |
|
381 |
static void FUNCC(pred4x4_vertical_left_rv40_nodown)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
382 |
pixel *src = (pixel*)p_src; |
383 |
int stride = p_stride>>(sizeof(pixel)-1); |
384 |
LOAD_LEFT_EDGE |
385 |
|
386 |
FUNCC(pred4x4_vertical_left_rv40_internal)(p_src, topright, p_stride, l0, l1, l2, l3, l3); |
387 |
} |
388 |
|
389 |
static void FUNCC(pred4x4_vertical_left_vp8)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
390 |
pixel *src = (pixel*)p_src; |
391 |
const pixel *topright = (const pixel*)p_topright; |
392 |
int stride = p_stride>>(sizeof(pixel)-1); |
393 |
LOAD_TOP_EDGE |
394 |
LOAD_TOP_RIGHT_EDGE |
395 |
|
396 |
src[0+0*stride]=(t0 + t1 + 1)>>1; |
397 |
src[1+0*stride]= |
398 |
src[0+2*stride]=(t1 + t2 + 1)>>1; |
399 |
src[2+0*stride]= |
400 |
src[1+2*stride]=(t2 + t3 + 1)>>1; |
401 |
src[3+0*stride]= |
402 |
src[2+2*stride]=(t3 + t4 + 1)>>1; |
403 |
src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2; |
404 |
src[1+1*stride]= |
405 |
src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2; |
406 |
src[2+1*stride]= |
407 |
src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2; |
408 |
src[3+1*stride]= |
409 |
src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2; |
410 |
src[3+2*stride]=(t4 + 2*t5 + t6 + 2)>>2; |
411 |
src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2; |
412 |
} |
413 |
|
414 |
static void FUNCC(pred4x4_horizontal_up)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
415 |
pixel *src = (pixel*)p_src; |
416 |
int stride = p_stride>>(sizeof(pixel)-1); |
417 |
LOAD_LEFT_EDGE |
418 |
|
419 |
src[0+0*stride]=(l0 + l1 + 1)>>1; |
420 |
src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2; |
421 |
src[2+0*stride]= |
422 |
src[0+1*stride]=(l1 + l2 + 1)>>1; |
423 |
src[3+0*stride]= |
424 |
src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2; |
425 |
src[2+1*stride]= |
426 |
src[0+2*stride]=(l2 + l3 + 1)>>1; |
427 |
src[3+1*stride]= |
428 |
src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2; |
429 |
src[3+2*stride]= |
430 |
src[1+3*stride]= |
431 |
src[0+3*stride]= |
432 |
src[2+2*stride]= |
433 |
src[2+3*stride]= |
434 |
src[3+3*stride]=l3; |
435 |
} |
436 |
|
437 |
static void FUNCC(pred4x4_horizontal_up_rv40)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
438 |
pixel *src = (pixel*)p_src; |
439 |
const pixel *topright = (const pixel*)p_topright; |
440 |
int stride = p_stride>>(sizeof(pixel)-1); |
441 |
LOAD_LEFT_EDGE |
442 |
LOAD_DOWN_LEFT_EDGE |
443 |
LOAD_TOP_EDGE |
444 |
LOAD_TOP_RIGHT_EDGE |
445 |
|
446 |
src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3; |
447 |
src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3; |
448 |
src[2+0*stride]= |
449 |
src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3; |
450 |
src[3+0*stride]= |
451 |
src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3; |
452 |
src[2+1*stride]= |
453 |
src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3; |
454 |
src[3+1*stride]= |
455 |
src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3; |
456 |
src[3+2*stride]= |
457 |
src[1+3*stride]=(l3 + 2*l4 + l5 + 2)>>2; |
458 |
src[0+3*stride]= |
459 |
src[2+2*stride]=(t6 + t7 + l3 + l4 + 2)>>2; |
460 |
src[2+3*stride]=(l4 + l5 + 1)>>1; |
461 |
src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2; |
462 |
} |
463 |
|
464 |
static void FUNCC(pred4x4_horizontal_up_rv40_nodown)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){ |
465 |
pixel *src = (pixel*)p_src; |
466 |
const pixel *topright = (const pixel*)p_topright; |
467 |
int stride = p_stride>>(sizeof(pixel)-1); |
468 |
LOAD_LEFT_EDGE |
469 |
LOAD_TOP_EDGE |
470 |
LOAD_TOP_RIGHT_EDGE |
471 |
|
472 |
src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3; |
473 |
src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3; |
474 |
src[2+0*stride]= |
475 |
src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3; |
476 |
src[3+0*stride]= |
477 |
src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3; |
478 |
src[2+1*stride]= |
479 |
src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3; |
480 |
src[3+1*stride]= |
481 |
src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3; |
482 |
src[3+2*stride]= |
483 |
src[1+3*stride]=l3; |
484 |
src[0+3*stride]= |
485 |
src[2+2*stride]=(t6 + t7 + 2*l3 + 2)>>2; |
486 |
src[2+3*stride]= |
487 |
src[3+3*stride]=l3; |
488 |
} |
489 |
|
490 |
static void FUNCC(pred4x4_horizontal_down)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
491 |
pixel *src = (pixel*)p_src; |
492 |
int stride = p_stride>>(sizeof(pixel)-1); |
493 |
const int lt= src[-1-1*stride]; |
494 |
LOAD_TOP_EDGE |
495 |
LOAD_LEFT_EDGE |
496 |
|
497 |
src[0+0*stride]= |
498 |
src[2+1*stride]=(lt + l0 + 1)>>1; |
499 |
src[1+0*stride]= |
500 |
src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2; |
501 |
src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2; |
502 |
src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2; |
503 |
src[0+1*stride]= |
504 |
src[2+2*stride]=(l0 + l1 + 1)>>1; |
505 |
src[1+1*stride]= |
506 |
src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2; |
507 |
src[0+2*stride]= |
508 |
src[2+3*stride]=(l1 + l2+ 1)>>1; |
509 |
src[1+2*stride]= |
510 |
src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2; |
511 |
src[0+3*stride]=(l2 + l3 + 1)>>1; |
512 |
src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2; |
513 |
} |
514 |
|
515 |
static void FUNCC(pred4x4_tm_vp8)(uint8_t *p_src, const uint8_t *topright, int p_stride){ |
516 |
pixel *src = (pixel*)p_src; |
517 |
int stride = p_stride>>(sizeof(pixel)-1); |
518 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
|
519 |
pixel *top = src-stride; |
520 |
int y;
|
521 |
|
522 |
for (y = 0; y < 4; y++) { |
523 |
uint8_t *cm_in = cm + src[-1];
|
524 |
src[0] = cm_in[top[0]]; |
525 |
src[1] = cm_in[top[1]]; |
526 |
src[2] = cm_in[top[2]]; |
527 |
src[3] = cm_in[top[3]]; |
528 |
src += stride; |
529 |
} |
530 |
} |
531 |
|
532 |
static void FUNCC(pred16x16_vertical)(uint8_t *p_src, int p_stride){ |
533 |
int i;
|
534 |
pixel *src = (pixel*)p_src; |
535 |
int stride = p_stride>>(sizeof(pixel)-1); |
536 |
const pixel4 a = ((pixel4*)(src-stride))[0]; |
537 |
const pixel4 b = ((pixel4*)(src-stride))[1]; |
538 |
const pixel4 c = ((pixel4*)(src-stride))[2]; |
539 |
const pixel4 d = ((pixel4*)(src-stride))[3]; |
540 |
|
541 |
for(i=0; i<16; i++){ |
542 |
((pixel4*)(src+i*stride))[0] = a;
|
543 |
((pixel4*)(src+i*stride))[1] = b;
|
544 |
((pixel4*)(src+i*stride))[2] = c;
|
545 |
((pixel4*)(src+i*stride))[3] = d;
|
546 |
} |
547 |
} |
548 |
|
549 |
static void FUNCC(pred16x16_horizontal)(uint8_t *p_src, int stride){ |
550 |
int i;
|
551 |
pixel *src = (pixel*)p_src; |
552 |
stride >>= sizeof(pixel)-1; |
553 |
|
554 |
for(i=0; i<16; i++){ |
555 |
((pixel4*)(src+i*stride))[0] =
|
556 |
((pixel4*)(src+i*stride))[1] =
|
557 |
((pixel4*)(src+i*stride))[2] =
|
558 |
((pixel4*)(src+i*stride))[3] = PIXEL_SPLAT_X4(src[-1+i*stride]); |
559 |
} |
560 |
} |
561 |
|
562 |
#define PREDICT_16x16_DC(v)\
|
563 |
for(i=0; i<16; i++){\ |
564 |
AV_WN4P(src+ 0, v);\
|
565 |
AV_WN4P(src+ 4, v);\
|
566 |
AV_WN4P(src+ 8, v);\
|
567 |
AV_WN4P(src+12, v);\
|
568 |
src += stride;\ |
569 |
} |
570 |
|
571 |
static void FUNCC(pred16x16_dc)(uint8_t *p_src, int stride){ |
572 |
int i, dc=0; |
573 |
pixel *src = (pixel*)p_src; |
574 |
pixel4 dcsplat; |
575 |
stride >>= sizeof(pixel)-1; |
576 |
|
577 |
for(i=0;i<16; i++){ |
578 |
dc+= src[-1+i*stride];
|
579 |
} |
580 |
|
581 |
for(i=0;i<16; i++){ |
582 |
dc+= src[i-stride]; |
583 |
} |
584 |
|
585 |
dcsplat = PIXEL_SPLAT_X4((dc+16)>>5); |
586 |
PREDICT_16x16_DC(dcsplat); |
587 |
} |
588 |
|
589 |
static void FUNCC(pred16x16_left_dc)(uint8_t *p_src, int stride){ |
590 |
int i, dc=0; |
591 |
pixel *src = (pixel*)p_src; |
592 |
pixel4 dcsplat; |
593 |
stride >>= sizeof(pixel)-1; |
594 |
|
595 |
for(i=0;i<16; i++){ |
596 |
dc+= src[-1+i*stride];
|
597 |
} |
598 |
|
599 |
dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); |
600 |
PREDICT_16x16_DC(dcsplat); |
601 |
} |
602 |
|
603 |
static void FUNCC(pred16x16_top_dc)(uint8_t *p_src, int stride){ |
604 |
int i, dc=0; |
605 |
pixel *src = (pixel*)p_src; |
606 |
pixel4 dcsplat; |
607 |
stride >>= sizeof(pixel)-1; |
608 |
|
609 |
for(i=0;i<16; i++){ |
610 |
dc+= src[i-stride]; |
611 |
} |
612 |
|
613 |
dcsplat = PIXEL_SPLAT_X4((dc+8)>>4); |
614 |
PREDICT_16x16_DC(dcsplat); |
615 |
} |
616 |
|
617 |
#define PRED16x16_X(n, v) \
|
618 |
static void FUNCC(pred16x16_##n##_dc)(uint8_t *p_src, int stride){\ |
619 |
int i;\
|
620 |
pixel *src = (pixel*)p_src;\ |
621 |
stride >>= sizeof(pixel)-1;\ |
622 |
PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\ |
623 |
} |
624 |
|
625 |
PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1); |
626 |
PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0); |
627 |
PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1); |
628 |
|
629 |
static inline void FUNCC(pred16x16_plane_compat)(uint8_t *p_src, int p_stride, const int svq3, const int rv40){ |
630 |
int i, j, k;
|
631 |
int a;
|
632 |
INIT_CLIP |
633 |
pixel *src = (pixel*)p_src; |
634 |
int stride = p_stride>>(sizeof(pixel)-1); |
635 |
const pixel * const src0 = src +7-stride; |
636 |
const pixel * src1 = src +8*stride-1; |
637 |
const pixel * src2 = src1-2*stride; // == src+6*stride-1; |
638 |
int H = src0[1] - src0[-1]; |
639 |
int V = src1[0] - src2[ 0]; |
640 |
for(k=2; k<=8; ++k) { |
641 |
src1 += stride; src2 -= stride; |
642 |
H += k*(src0[k] - src0[-k]); |
643 |
V += k*(src1[0] - src2[ 0]); |
644 |
} |
645 |
if(svq3){
|
646 |
H = ( 5*(H/4) ) / 16; |
647 |
V = ( 5*(V/4) ) / 16; |
648 |
|
649 |
/* required for 100% accuracy */
|
650 |
i = H; H = V; V = i; |
651 |
}else if(rv40){ |
652 |
H = ( H + (H>>2) ) >> 4; |
653 |
V = ( V + (V>>2) ) >> 4; |
654 |
}else{
|
655 |
H = ( 5*H+32 ) >> 6; |
656 |
V = ( 5*V+32 ) >> 6; |
657 |
} |
658 |
|
659 |
a = 16*(src1[0] + src2[16] + 1) - 7*(V+H); |
660 |
for(j=16; j>0; --j) { |
661 |
int b = a;
|
662 |
a += V; |
663 |
for(i=-16; i<0; i+=4) { |
664 |
src[16+i] = CLIP((b ) >> 5); |
665 |
src[17+i] = CLIP((b+ H) >> 5); |
666 |
src[18+i] = CLIP((b+2*H) >> 5); |
667 |
src[19+i] = CLIP((b+3*H) >> 5); |
668 |
b += 4*H;
|
669 |
} |
670 |
src += stride; |
671 |
} |
672 |
} |
673 |
|
674 |
static void FUNCC(pred16x16_plane)(uint8_t *src, int stride){ |
675 |
FUNCC(pred16x16_plane_compat)(src, stride, 0, 0); |
676 |
} |
677 |
|
678 |
static void FUNCC(pred16x16_plane_svq3)(uint8_t *src, int stride){ |
679 |
FUNCC(pred16x16_plane_compat)(src, stride, 1, 0); |
680 |
} |
681 |
|
682 |
static void FUNCC(pred16x16_plane_rv40)(uint8_t *src, int stride){ |
683 |
FUNCC(pred16x16_plane_compat)(src, stride, 0, 1); |
684 |
} |
685 |
|
686 |
static void FUNCC(pred16x16_tm_vp8)(uint8_t *src, int stride){ |
687 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
|
688 |
uint8_t *top = src-stride; |
689 |
int y;
|
690 |
|
691 |
for (y = 0; y < 16; y++) { |
692 |
uint8_t *cm_in = cm + src[-1];
|
693 |
src[0] = cm_in[top[0]]; |
694 |
src[1] = cm_in[top[1]]; |
695 |
src[2] = cm_in[top[2]]; |
696 |
src[3] = cm_in[top[3]]; |
697 |
src[4] = cm_in[top[4]]; |
698 |
src[5] = cm_in[top[5]]; |
699 |
src[6] = cm_in[top[6]]; |
700 |
src[7] = cm_in[top[7]]; |
701 |
src[8] = cm_in[top[8]]; |
702 |
src[9] = cm_in[top[9]]; |
703 |
src[10] = cm_in[top[10]]; |
704 |
src[11] = cm_in[top[11]]; |
705 |
src[12] = cm_in[top[12]]; |
706 |
src[13] = cm_in[top[13]]; |
707 |
src[14] = cm_in[top[14]]; |
708 |
src[15] = cm_in[top[15]]; |
709 |
src += stride; |
710 |
} |
711 |
} |
712 |
|
713 |
static void FUNCC(pred8x8_vertical)(uint8_t *p_src, int p_stride){ |
714 |
int i;
|
715 |
pixel *src = (pixel*)p_src; |
716 |
int stride = p_stride>>(sizeof(pixel)-1); |
717 |
const pixel4 a= ((pixel4*)(src-stride))[0]; |
718 |
const pixel4 b= ((pixel4*)(src-stride))[1]; |
719 |
|
720 |
for(i=0; i<8; i++){ |
721 |
((pixel4*)(src+i*stride))[0]= a;
|
722 |
((pixel4*)(src+i*stride))[1]= b;
|
723 |
} |
724 |
} |
725 |
|
726 |
static void FUNCC(pred8x8_horizontal)(uint8_t *p_src, int stride){ |
727 |
int i;
|
728 |
pixel *src = (pixel*)p_src; |
729 |
stride >>= sizeof(pixel)-1; |
730 |
|
731 |
for(i=0; i<8; i++){ |
732 |
((pixel4*)(src+i*stride))[0]=
|
733 |
((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(src[-1+i*stride]); |
734 |
} |
735 |
} |
736 |
|
737 |
#define PRED8x8_X(n, v)\
|
738 |
static void FUNCC(pred8x8_##n##_dc)(uint8_t *p_src, int stride){\ |
739 |
int i;\
|
740 |
pixel *src = (pixel*)p_src;\ |
741 |
stride >>= sizeof(pixel)-1;\ |
742 |
for(i=0; i<8; i++){\ |
743 |
((pixel4*)(src+i*stride))[0]=\
|
744 |
((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(v);\
|
745 |
}\ |
746 |
} |
747 |
|
748 |
PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1); |
749 |
PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0); |
750 |
PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1); |
751 |
|
752 |
static void FUNCC(pred8x8_left_dc)(uint8_t *p_src, int stride){ |
753 |
int i;
|
754 |
int dc0, dc2;
|
755 |
pixel4 dc0splat, dc2splat; |
756 |
pixel *src = (pixel*)p_src; |
757 |
stride >>= sizeof(pixel)-1; |
758 |
|
759 |
dc0=dc2=0;
|
760 |
for(i=0;i<4; i++){ |
761 |
dc0+= src[-1+i*stride];
|
762 |
dc2+= src[-1+(i+4)*stride]; |
763 |
} |
764 |
dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); |
765 |
dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); |
766 |
|
767 |
for(i=0; i<4; i++){ |
768 |
((pixel4*)(src+i*stride))[0]=
|
769 |
((pixel4*)(src+i*stride))[1]= dc0splat;
|
770 |
} |
771 |
for(i=4; i<8; i++){ |
772 |
((pixel4*)(src+i*stride))[0]=
|
773 |
((pixel4*)(src+i*stride))[1]= dc2splat;
|
774 |
} |
775 |
} |
776 |
|
777 |
static void FUNCC(pred8x8_left_dc_rv40)(uint8_t *p_src, int stride){ |
778 |
int i;
|
779 |
int dc0;
|
780 |
pixel4 dc0splat; |
781 |
pixel *src = (pixel*)p_src; |
782 |
stride >>= sizeof(pixel)-1; |
783 |
|
784 |
dc0=0;
|
785 |
for(i=0;i<8; i++) |
786 |
dc0+= src[-1+i*stride];
|
787 |
dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3); |
788 |
|
789 |
for(i=0; i<8; i++){ |
790 |
((pixel4*)(src+i*stride))[0]=
|
791 |
((pixel4*)(src+i*stride))[1]= dc0splat;
|
792 |
} |
793 |
} |
794 |
|
795 |
static void FUNCC(pred8x8_top_dc)(uint8_t *p_src, int stride){ |
796 |
int i;
|
797 |
int dc0, dc1;
|
798 |
pixel4 dc0splat, dc1splat; |
799 |
pixel *src = (pixel*)p_src; |
800 |
stride >>= sizeof(pixel)-1; |
801 |
|
802 |
dc0=dc1=0;
|
803 |
for(i=0;i<4; i++){ |
804 |
dc0+= src[i-stride]; |
805 |
dc1+= src[4+i-stride];
|
806 |
} |
807 |
dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); |
808 |
dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); |
809 |
|
810 |
for(i=0; i<4; i++){ |
811 |
((pixel4*)(src+i*stride))[0]= dc0splat;
|
812 |
((pixel4*)(src+i*stride))[1]= dc1splat;
|
813 |
} |
814 |
for(i=4; i<8; i++){ |
815 |
((pixel4*)(src+i*stride))[0]= dc0splat;
|
816 |
((pixel4*)(src+i*stride))[1]= dc1splat;
|
817 |
} |
818 |
} |
819 |
|
820 |
static void FUNCC(pred8x8_top_dc_rv40)(uint8_t *p_src, int stride){ |
821 |
int i;
|
822 |
int dc0;
|
823 |
pixel4 dc0splat; |
824 |
pixel *src = (pixel*)p_src; |
825 |
stride >>= sizeof(pixel)-1; |
826 |
|
827 |
dc0=0;
|
828 |
for(i=0;i<8; i++) |
829 |
dc0+= src[i-stride]; |
830 |
dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3); |
831 |
|
832 |
for(i=0; i<8; i++){ |
833 |
((pixel4*)(src+i*stride))[0]=
|
834 |
((pixel4*)(src+i*stride))[1]= dc0splat;
|
835 |
} |
836 |
} |
837 |
|
838 |
|
839 |
static void FUNCC(pred8x8_dc)(uint8_t *p_src, int stride){ |
840 |
int i;
|
841 |
int dc0, dc1, dc2;
|
842 |
pixel4 dc0splat, dc1splat, dc2splat, dc3splat; |
843 |
pixel *src = (pixel*)p_src; |
844 |
stride >>= sizeof(pixel)-1; |
845 |
|
846 |
dc0=dc1=dc2=0;
|
847 |
for(i=0;i<4; i++){ |
848 |
dc0+= src[-1+i*stride] + src[i-stride];
|
849 |
dc1+= src[4+i-stride];
|
850 |
dc2+= src[-1+(i+4)*stride]; |
851 |
} |
852 |
dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3); |
853 |
dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); |
854 |
dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); |
855 |
dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3); |
856 |
|
857 |
for(i=0; i<4; i++){ |
858 |
((pixel4*)(src+i*stride))[0]= dc0splat;
|
859 |
((pixel4*)(src+i*stride))[1]= dc1splat;
|
860 |
} |
861 |
for(i=4; i<8; i++){ |
862 |
((pixel4*)(src+i*stride))[0]= dc2splat;
|
863 |
((pixel4*)(src+i*stride))[1]= dc3splat;
|
864 |
} |
865 |
} |
866 |
|
867 |
//the following 4 function should not be optimized!
|
868 |
static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){ |
869 |
FUNCC(pred8x8_top_dc)(src, stride); |
870 |
FUNCC(pred4x4_dc)(src, NULL, stride);
|
871 |
} |
872 |
|
873 |
static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){ |
874 |
FUNCC(pred8x8_dc)(src, stride); |
875 |
FUNCC(pred4x4_top_dc)(src, NULL, stride);
|
876 |
} |
877 |
|
878 |
static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){ |
879 |
FUNCC(pred8x8_left_dc)(src, stride); |
880 |
FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride); |
881 |
FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride); |
882 |
} |
883 |
|
884 |
static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){ |
885 |
FUNCC(pred8x8_left_dc)(src, stride); |
886 |
FUNCC(pred4x4_128_dc)(src , NULL, stride);
|
887 |
FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride); |
888 |
} |
889 |
|
890 |
static void FUNCC(pred8x8_dc_rv40)(uint8_t *p_src, int stride){ |
891 |
int i;
|
892 |
int dc0=0; |
893 |
pixel4 dc0splat; |
894 |
pixel *src = (pixel*)p_src; |
895 |
stride >>= sizeof(pixel)-1; |
896 |
|
897 |
for(i=0;i<4; i++){ |
898 |
dc0+= src[-1+i*stride] + src[i-stride];
|
899 |
dc0+= src[4+i-stride];
|
900 |
dc0+= src[-1+(i+4)*stride]; |
901 |
} |
902 |
dc0splat = PIXEL_SPLAT_X4((dc0 + 8)>>4); |
903 |
|
904 |
for(i=0; i<4; i++){ |
905 |
((pixel4*)(src+i*stride))[0]= dc0splat;
|
906 |
((pixel4*)(src+i*stride))[1]= dc0splat;
|
907 |
} |
908 |
for(i=4; i<8; i++){ |
909 |
((pixel4*)(src+i*stride))[0]= dc0splat;
|
910 |
((pixel4*)(src+i*stride))[1]= dc0splat;
|
911 |
} |
912 |
} |
913 |
|
914 |
static void FUNCC(pred8x8_plane)(uint8_t *p_src, int p_stride){ |
915 |
int j, k;
|
916 |
int a;
|
917 |
INIT_CLIP |
918 |
pixel *src = (pixel*)p_src; |
919 |
int stride = p_stride>>(sizeof(pixel)-1); |
920 |
const pixel * const src0 = src +3-stride; |
921 |
const pixel * src1 = src +4*stride-1; |
922 |
const pixel * src2 = src1-2*stride; // == src+2*stride-1; |
923 |
int H = src0[1] - src0[-1]; |
924 |
int V = src1[0] - src2[ 0]; |
925 |
for(k=2; k<=4; ++k) { |
926 |
src1 += stride; src2 -= stride; |
927 |
H += k*(src0[k] - src0[-k]); |
928 |
V += k*(src1[0] - src2[ 0]); |
929 |
} |
930 |
H = ( 17*H+16 ) >> 5; |
931 |
V = ( 17*V+16 ) >> 5; |
932 |
|
933 |
a = 16*(src1[0] + src2[8]+1) - 3*(V+H); |
934 |
for(j=8; j>0; --j) { |
935 |
int b = a;
|
936 |
a += V; |
937 |
src[0] = CLIP((b ) >> 5); |
938 |
src[1] = CLIP((b+ H) >> 5); |
939 |
src[2] = CLIP((b+2*H) >> 5); |
940 |
src[3] = CLIP((b+3*H) >> 5); |
941 |
src[4] = CLIP((b+4*H) >> 5); |
942 |
src[5] = CLIP((b+5*H) >> 5); |
943 |
src[6] = CLIP((b+6*H) >> 5); |
944 |
src[7] = CLIP((b+7*H) >> 5); |
945 |
src += stride; |
946 |
} |
947 |
} |
948 |
|
949 |
static void FUNCC(pred8x8_tm_vp8)(uint8_t *p_src, int p_stride){ |
950 |
pixel *src = (pixel*)p_src; |
951 |
int stride = p_stride>>(sizeof(pixel)-1); |
952 |
uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
|
953 |
pixel *top = src-stride; |
954 |
int y;
|
955 |
|
956 |
for (y = 0; y < 8; y++) { |
957 |
uint8_t *cm_in = cm + src[-1];
|
958 |
src[0] = cm_in[top[0]]; |
959 |
src[1] = cm_in[top[1]]; |
960 |
src[2] = cm_in[top[2]]; |
961 |
src[3] = cm_in[top[3]]; |
962 |
src[4] = cm_in[top[4]]; |
963 |
src[5] = cm_in[top[5]]; |
964 |
src[6] = cm_in[top[6]]; |
965 |
src[7] = cm_in[top[7]]; |
966 |
src += stride; |
967 |
} |
968 |
} |
969 |
|
970 |
#define SRC(x,y) src[(x)+(y)*stride]
|
971 |
#define PL(y) \
|
972 |
const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; |
973 |
#define PREDICT_8x8_LOAD_LEFT \
|
974 |
const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \ |
975 |
+ 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \ |
976 |
PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \ |
977 |
const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2 |
978 |
|
979 |
#define PT(x) \
|
980 |
const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; |
981 |
#define PREDICT_8x8_LOAD_TOP \
|
982 |
const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \ |
983 |
+ 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \ |
984 |
PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \ |
985 |
const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \ |
986 |
+ 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2 |
987 |
|
988 |
#define PTR(x) \
|
989 |
t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2; |
990 |
#define PREDICT_8x8_LOAD_TOPRIGHT \
|
991 |
int t8, t9, t10, t11, t12, t13, t14, t15; \
|
992 |
if(has_topright) { \
|
993 |
PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \ |
994 |
t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \ |
995 |
} else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1); |
996 |
|
997 |
#define PREDICT_8x8_LOAD_TOPLEFT \
|
998 |
const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2 |
999 |
|
1000 |
#define PREDICT_8x8_DC(v) \
|
1001 |
int y; \
|
1002 |
for( y = 0; y < 8; y++ ) { \ |
1003 |
((pixel4*)src)[0] = \
|
1004 |
((pixel4*)src)[1] = v; \
|
1005 |
src += stride; \ |
1006 |
} |
1007 |
|
1008 |
static void FUNCC(pred8x8l_128_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride) |
1009 |
{ |
1010 |
pixel *src = (pixel*)p_src; |
1011 |
int stride = p_stride>>(sizeof(pixel)-1); |
1012 |
|
1013 |
PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1))); |
1014 |
} |
1015 |
static void FUNCC(pred8x8l_left_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride) |
1016 |
{ |
1017 |
pixel *src = (pixel*)p_src; |
1018 |
int stride = p_stride>>(sizeof(pixel)-1); |
1019 |
|
1020 |
PREDICT_8x8_LOAD_LEFT; |
1021 |
const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3); |
1022 |
PREDICT_8x8_DC(dc); |
1023 |
} |
1024 |
static void FUNCC(pred8x8l_top_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride) |
1025 |
{ |
1026 |
pixel *src = (pixel*)p_src; |
1027 |
int stride = p_stride>>(sizeof(pixel)-1); |
1028 |
|
1029 |
PREDICT_8x8_LOAD_TOP; |
1030 |
const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3); |
1031 |
PREDICT_8x8_DC(dc); |
1032 |
} |
1033 |
static void FUNCC(pred8x8l_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride) |
1034 |
{ |
1035 |
pixel *src = (pixel*)p_src; |
1036 |
int stride = p_stride>>(sizeof(pixel)-1); |
1037 |
|
1038 |
PREDICT_8x8_LOAD_LEFT; |
1039 |
PREDICT_8x8_LOAD_TOP; |
1040 |
const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
|
1041 |
+t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4); |
1042 |
PREDICT_8x8_DC(dc); |
1043 |
} |
1044 |
static void FUNCC(pred8x8l_horizontal)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride) |
1045 |
{ |
1046 |
pixel *src = (pixel*)p_src; |
1047 |
int stride = p_stride>>(sizeof(pixel)-1); |
1048 |
|
1049 |
PREDICT_8x8_LOAD_LEFT; |
1050 |
#define ROW(y) ((pixel4*)(src+y*stride))[0] =\ |
1051 |
((pixel4*)(src+y*stride))[1] = PIXEL_SPLAT_X4(l##y) |
1052 |
ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7); |
1053 |
#undef ROW
|
1054 |
} |
1055 |
static void FUNCC(pred8x8l_vertical)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride) |
1056 |
{ |
1057 |
int y;
|
1058 |
pixel *src = (pixel*)p_src; |
1059 |
int stride = p_stride>>(sizeof(pixel)-1); |
1060 |
|
1061 |
PREDICT_8x8_LOAD_TOP; |
1062 |
src[0] = t0;
|
1063 |
src[1] = t1;
|
1064 |
src[2] = t2;
|
1065 |
src[3] = t3;
|
1066 |
src[4] = t4;
|
1067 |
src[5] = t5;
|
1068 |
src[6] = t6;
|
1069 |
src[7] = t7;
|
1070 |
for( y = 1; y < 8; y++ ) { |
1071 |
((pixel4*)(src+y*stride))[0] = ((pixel4*)src)[0]; |
1072 |
((pixel4*)(src+y*stride))[1] = ((pixel4*)src)[1]; |
1073 |
} |
1074 |
} |
1075 |
static void FUNCC(pred8x8l_down_left)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride) |
1076 |
{ |
1077 |
pixel *src = (pixel*)p_src; |
1078 |
int stride = p_stride>>(sizeof(pixel)-1); |
1079 |
PREDICT_8x8_LOAD_TOP; |
1080 |
PREDICT_8x8_LOAD_TOPRIGHT; |
1081 |
SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2; |
1082 |
SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2; |
1083 |
SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2; |
1084 |
SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2; |
1085 |
SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2; |
1086 |
SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2; |
1087 |
SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2; |
1088 |
SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2; |
1089 |
SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2; |
1090 |
SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2; |
1091 |
SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2; |
1092 |
SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2; |
1093 |
SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2; |
1094 |
SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2; |
1095 |
SRC(7,7)= (t14 + 3*t15 + 2) >> 2; |
1096 |
} |
1097 |
static void FUNCC(pred8x8l_down_right)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride) |
1098 |
{ |
1099 |
pixel *src = (pixel*)p_src; |
1100 |
int stride = p_stride>>(sizeof(pixel)-1); |
1101 |
PREDICT_8x8_LOAD_TOP; |
1102 |
PREDICT_8x8_LOAD_LEFT; |
1103 |
PREDICT_8x8_LOAD_TOPLEFT; |
1104 |
SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2; |
1105 |
SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2; |
1106 |
SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2; |
1107 |
SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2; |
1108 |
SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2; |
1109 |
SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2; |
1110 |
SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2; |
1111 |
SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2; |
1112 |
SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2; |
1113 |
SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2; |
1114 |
SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2; |
1115 |
SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2; |
1116 |
SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2; |
1117 |
SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2; |
1118 |
SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2; |
1119 |
} |
1120 |
static void FUNCC(pred8x8l_vertical_right)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride) |
1121 |
{ |
1122 |
pixel *src = (pixel*)p_src; |
1123 |
int stride = p_stride>>(sizeof(pixel)-1); |
1124 |
PREDICT_8x8_LOAD_TOP; |
1125 |
PREDICT_8x8_LOAD_LEFT; |
1126 |
PREDICT_8x8_LOAD_TOPLEFT; |
1127 |
SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2; |
1128 |
SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2; |
1129 |
SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2; |
1130 |
SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2; |
1131 |
SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2; |
1132 |
SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2; |
1133 |
SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2; |
1134 |
SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1; |
1135 |
SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2; |
1136 |
SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1; |
1137 |
SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2; |
1138 |
SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1; |
1139 |
SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2; |
1140 |
SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1; |
1141 |
SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2; |
1142 |
SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1; |
1143 |
SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2; |
1144 |
SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1; |
1145 |
SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2; |
1146 |
SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1; |
1147 |
SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2; |
1148 |
SRC(7,0)= (t6 + t7 + 1) >> 1; |
1149 |
} |
1150 |
static void FUNCC(pred8x8l_horizontal_down)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride) |
1151 |
{ |
1152 |
pixel *src = (pixel*)p_src; |
1153 |
int stride = p_stride>>(sizeof(pixel)-1); |
1154 |
PREDICT_8x8_LOAD_TOP; |
1155 |
PREDICT_8x8_LOAD_LEFT; |
1156 |
PREDICT_8x8_LOAD_TOPLEFT; |
1157 |
SRC(0,7)= (l6 + l7 + 1) >> 1; |
1158 |
SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2; |
1159 |
SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1; |
1160 |
SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2; |
1161 |
SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1; |
1162 |
SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2; |
1163 |
SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1; |
1164 |
SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2; |
1165 |
SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1; |
1166 |
SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2; |
1167 |
SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1; |
1168 |
SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2; |
1169 |
SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1; |
1170 |
SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2; |
1171 |
SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1; |
1172 |
SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2; |
1173 |
SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2; |
1174 |
SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2; |
1175 |
SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2; |
1176 |
SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2; |
1177 |
SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2; |
1178 |
SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2; |
1179 |
} |
1180 |
static void FUNCC(pred8x8l_vertical_left)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride) |
1181 |
{ |
1182 |
pixel *src = (pixel*)p_src; |
1183 |
int stride = p_stride>>(sizeof(pixel)-1); |
1184 |
PREDICT_8x8_LOAD_TOP; |
1185 |
PREDICT_8x8_LOAD_TOPRIGHT; |
1186 |
SRC(0,0)= (t0 + t1 + 1) >> 1; |
1187 |
SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2; |
1188 |
SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1; |
1189 |
SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2; |
1190 |
SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1; |
1191 |
SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2; |
1192 |
SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1; |
1193 |
SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2; |
1194 |
SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1; |
1195 |
SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2; |
1196 |
SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1; |
1197 |
SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2; |
1198 |
SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1; |
1199 |
SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2; |
1200 |
SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1; |
1201 |
SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2; |
1202 |
SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1; |
1203 |
SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2; |
1204 |
SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1; |
1205 |
SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2; |
1206 |
SRC(7,6)= (t10 + t11 + 1) >> 1; |
1207 |
SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2; |
1208 |
} |
1209 |
static void FUNCC(pred8x8l_horizontal_up)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride) |
1210 |
{ |
1211 |
pixel *src = (pixel*)p_src; |
1212 |
int stride = p_stride>>(sizeof(pixel)-1); |
1213 |
PREDICT_8x8_LOAD_LEFT; |
1214 |
SRC(0,0)= (l0 + l1 + 1) >> 1; |
1215 |
SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2; |
1216 |
SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1; |
1217 |
SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2; |
1218 |
SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1; |
1219 |
SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2; |
1220 |
SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1; |
1221 |
SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2; |
1222 |
SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1; |
1223 |
SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2; |
1224 |
SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1; |
1225 |
SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2; |
1226 |
SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1; |
1227 |
SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2; |
1228 |
SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)= |
1229 |
SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)= |
1230 |
SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)= |
1231 |
SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7; |
1232 |
} |
1233 |
#undef PREDICT_8x8_LOAD_LEFT
|
1234 |
#undef PREDICT_8x8_LOAD_TOP
|
1235 |
#undef PREDICT_8x8_LOAD_TOPLEFT
|
1236 |
#undef PREDICT_8x8_LOAD_TOPRIGHT
|
1237 |
#undef PREDICT_8x8_DC
|
1238 |
#undef PTR
|
1239 |
#undef PT
|
1240 |
#undef PL
|
1241 |
#undef SRC
|
1242 |
|
1243 |
static void FUNCC(pred4x4_vertical_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){ |
1244 |
int i;
|
1245 |
pixel *pix = (pixel*)p_pix; |
1246 |
const dctcoef *block = (const dctcoef*)p_block; |
1247 |
stride >>= sizeof(pixel)-1; |
1248 |
pix -= stride; |
1249 |
for(i=0; i<4; i++){ |
1250 |
pixel v = pix[0];
|
1251 |
pix[1*stride]= v += block[0]; |
1252 |
pix[2*stride]= v += block[4]; |
1253 |
pix[3*stride]= v += block[8]; |
1254 |
pix[4*stride]= v + block[12]; |
1255 |
pix++; |
1256 |
block++; |
1257 |
} |
1258 |
} |
1259 |
|
1260 |
static void FUNCC(pred4x4_horizontal_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){ |
1261 |
int i;
|
1262 |
pixel *pix = (pixel*)p_pix; |
1263 |
const dctcoef *block = (const dctcoef*)p_block; |
1264 |
stride >>= sizeof(pixel)-1; |
1265 |
for(i=0; i<4; i++){ |
1266 |
pixel v = pix[-1];
|
1267 |
pix[0]= v += block[0]; |
1268 |
pix[1]= v += block[1]; |
1269 |
pix[2]= v += block[2]; |
1270 |
pix[3]= v + block[3]; |
1271 |
pix+= stride; |
1272 |
block+= 4;
|
1273 |
} |
1274 |
} |
1275 |
|
1276 |
static void FUNCC(pred8x8l_vertical_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){ |
1277 |
int i;
|
1278 |
pixel *pix = (pixel*)p_pix; |
1279 |
const dctcoef *block = (const dctcoef*)p_block; |
1280 |
stride >>= sizeof(pixel)-1; |
1281 |
pix -= stride; |
1282 |
for(i=0; i<8; i++){ |
1283 |
pixel v = pix[0];
|
1284 |
pix[1*stride]= v += block[0]; |
1285 |
pix[2*stride]= v += block[8]; |
1286 |
pix[3*stride]= v += block[16]; |
1287 |
pix[4*stride]= v += block[24]; |
1288 |
pix[5*stride]= v += block[32]; |
1289 |
pix[6*stride]= v += block[40]; |
1290 |
pix[7*stride]= v += block[48]; |
1291 |
pix[8*stride]= v + block[56]; |
1292 |
pix++; |
1293 |
block++; |
1294 |
} |
1295 |
} |
1296 |
|
1297 |
static void FUNCC(pred8x8l_horizontal_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){ |
1298 |
int i;
|
1299 |
pixel *pix = (pixel*)p_pix; |
1300 |
const dctcoef *block = (const dctcoef*)p_block; |
1301 |
stride >>= sizeof(pixel)-1; |
1302 |
for(i=0; i<8; i++){ |
1303 |
pixel v = pix[-1];
|
1304 |
pix[0]= v += block[0]; |
1305 |
pix[1]= v += block[1]; |
1306 |
pix[2]= v += block[2]; |
1307 |
pix[3]= v += block[3]; |
1308 |
pix[4]= v += block[4]; |
1309 |
pix[5]= v += block[5]; |
1310 |
pix[6]= v += block[6]; |
1311 |
pix[7]= v + block[7]; |
1312 |
pix+= stride; |
1313 |
block+= 8;
|
1314 |
} |
1315 |
} |
1316 |
|
1317 |
static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ |
1318 |
int i;
|
1319 |
for(i=0; i<16; i++) |
1320 |
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); |
1321 |
} |
1322 |
|
1323 |
static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ |
1324 |
int i;
|
1325 |
for(i=0; i<16; i++) |
1326 |
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); |
1327 |
} |
1328 |
|
1329 |
static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ |
1330 |
int i;
|
1331 |
for(i=0; i<4; i++) |
1332 |
FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); |
1333 |
} |
1334 |
|
1335 |
static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){ |
1336 |
int i;
|
1337 |
for(i=0; i<4; i++) |
1338 |
FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride); |
1339 |
} |