ffmpeg / libavcodec / bfin / dsputil_bfin.c @ b550bfaa
History | View | Annotate | Download (10.6 KB)
1 |
/*
|
---|---|
2 |
* BlackFin DSPUTILS
|
3 |
*
|
4 |
* Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
|
5 |
* Copyright (c) 2006 Michael Benjamin <michael.benjamin@analog.com>
|
6 |
*
|
7 |
* This file is part of FFmpeg.
|
8 |
*
|
9 |
* FFmpeg is free software; you can redistribute it and/or
|
10 |
* modify it under the terms of the GNU Lesser General Public
|
11 |
* License as published by the Free Software Foundation; either
|
12 |
* version 2.1 of the License, or (at your option) any later version.
|
13 |
*
|
14 |
* FFmpeg is distributed in the hope that it will be useful,
|
15 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
17 |
* Lesser General Public License for more details.
|
18 |
*
|
19 |
* You should have received a copy of the GNU Lesser General Public
|
20 |
* License along with FFmpeg; if not, write to the Free Software
|
21 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
22 |
*/
|
23 |
|
24 |
#include <unistd.h> |
25 |
#include <bits/bfin_sram.h> |
26 |
#include "avcodec.h" |
27 |
#include "dsputil.h" |
28 |
|
29 |
#define USE_L1CODE
|
30 |
|
31 |
#ifdef USE_L1CODE
|
32 |
#define L1CODE __attribute__ ((l1_text))
|
33 |
#else
|
34 |
#define L1CODE
|
35 |
#endif
|
36 |
int off;
|
37 |
|
38 |
|
39 |
extern void ff_bfin_idct (DCTELEM *block) L1CODE; |
40 |
extern void ff_bfin_fdct (DCTELEM *block) L1CODE; |
41 |
extern void ff_bfin_add_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) L1CODE; |
42 |
extern void ff_bfin_put_pixels_clamped (DCTELEM *block, uint8_t *dest, int line_size) L1CODE; |
43 |
extern void ff_bfin_diff_pixels (DCTELEM *block, uint8_t *s1, uint8_t *s2, int stride) L1CODE; |
44 |
extern void ff_bfin_get_pixels (DCTELEM *restrict block, const uint8_t *pixels, int line_size) L1CODE; |
45 |
extern int ff_bfin_pix_norm1 (uint8_t * pix, int line_size) L1CODE; |
46 |
extern int ff_bfin_z_sad8x8 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) L1CODE; |
47 |
extern int ff_bfin_z_sad16x16 (uint8_t *blk1, uint8_t *blk2, int dsz, int line_size, int h) L1CODE; |
48 |
|
49 |
extern void ff_bfin_z_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) L1CODE; |
50 |
extern void ff_bfin_z_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int dest_size, int line_size, int h) L1CODE; |
51 |
extern void ff_bfin_put_pixels16_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) L1CODE; |
52 |
extern void ff_bfin_put_pixels8_xy2_nornd (uint8_t *block, const uint8_t *s0, int line_size, int h) L1CODE; |
53 |
|
54 |
|
55 |
extern int ff_bfin_pix_sum (uint8_t *p, int stride) L1CODE; |
56 |
|
57 |
extern void ff_bfin_put_pixels8uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) L1CODE; |
58 |
extern void ff_bfin_put_pixels16uc (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int dest_size, int line_size, int h) L1CODE; |
59 |
extern void ff_bfin_put_pixels8uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) L1CODE; |
60 |
extern void ff_bfin_put_pixels16uc_nornd (uint8_t *block, const uint8_t *s0, const uint8_t *s1, int line_size, int h) L1CODE; |
61 |
|
62 |
extern int ff_bfin_sse4 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) L1CODE; |
63 |
extern int ff_bfin_sse8 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) L1CODE; |
64 |
extern int ff_bfin_sse16 (void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) L1CODE; |
65 |
|
66 |
|
67 |
static void bfin_idct_add (uint8_t *dest, int line_size, DCTELEM *block) |
68 |
{ |
69 |
ff_bfin_idct (block); |
70 |
ff_bfin_add_pixels_clamped (block, dest, line_size); |
71 |
} |
72 |
|
73 |
static void bfin_idct_put (uint8_t *dest, int line_size, DCTELEM *block) |
74 |
{ |
75 |
ff_bfin_idct (block); |
76 |
ff_bfin_put_pixels_clamped (block, dest, line_size); |
77 |
} |
78 |
|
79 |
|
80 |
static void bfin_clear_blocks (DCTELEM *blocks) |
81 |
{ |
82 |
// This is just a simple memset.
|
83 |
//
|
84 |
asm("P0=192; " |
85 |
"I0=%0; "
|
86 |
"R0=0; "
|
87 |
"LSETUP(clear_blocks_blkfn_lab,clear_blocks_blkfn_lab)LC0=P0;"
|
88 |
"clear_blocks_blkfn_lab:"
|
89 |
"[I0++]=R0;"
|
90 |
::"a" (blocks):"P0","I0","R0"); |
91 |
} |
92 |
|
93 |
|
94 |
|
95 |
static void bfin_put_pixels8 (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
96 |
{ |
97 |
ff_bfin_put_pixels8uc (block, pixels, pixels, line_size, line_size, h); |
98 |
} |
99 |
|
100 |
static void bfin_put_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
101 |
{ |
102 |
ff_bfin_put_pixels8uc (block, pixels, pixels+1, line_size, line_size, h);
|
103 |
} |
104 |
|
105 |
static void bfin_put_pixels8_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
106 |
{ |
107 |
ff_bfin_put_pixels8uc (block, pixels, pixels+line_size, line_size, line_size, h); |
108 |
} |
109 |
|
110 |
static void bfin_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h) |
111 |
{ |
112 |
ff_bfin_z_put_pixels8_xy2 (block,s0,line_size, line_size, h); |
113 |
} |
114 |
|
115 |
static void bfin_put_pixels16 (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
116 |
{ |
117 |
ff_bfin_put_pixels16uc (block, pixels, pixels, line_size, line_size, h); |
118 |
} |
119 |
|
120 |
static void bfin_put_pixels16_x2 (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
121 |
{ |
122 |
ff_bfin_put_pixels16uc (block, pixels, pixels+1, line_size, line_size, h);
|
123 |
} |
124 |
|
125 |
static void bfin_put_pixels16_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
126 |
{ |
127 |
ff_bfin_put_pixels16uc (block, pixels, pixels+line_size, line_size, line_size, h); |
128 |
} |
129 |
|
130 |
static void bfin_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h) |
131 |
{ |
132 |
ff_bfin_z_put_pixels16_xy2 (block,s0,line_size, line_size, h); |
133 |
} |
134 |
|
135 |
void bfin_put_pixels8_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
136 |
{ |
137 |
ff_bfin_put_pixels8uc_nornd (block, pixels, pixels, line_size, h); |
138 |
} |
139 |
|
140 |
static void bfin_put_pixels8_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
141 |
{ |
142 |
ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+1, line_size, h);
|
143 |
} |
144 |
|
145 |
static void bfin_put_pixels8_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
146 |
{ |
147 |
ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+line_size, line_size, h); |
148 |
} |
149 |
|
150 |
|
151 |
void bfin_put_pixels16_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
152 |
{ |
153 |
ff_bfin_put_pixels16uc_nornd (block, pixels, pixels, line_size, h); |
154 |
} |
155 |
|
156 |
static void bfin_put_pixels16_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
157 |
{ |
158 |
ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+1, line_size, h);
|
159 |
} |
160 |
|
161 |
static void bfin_put_pixels16_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
162 |
{ |
163 |
ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+line_size, line_size, h); |
164 |
} |
165 |
|
166 |
static int bfin_pix_abs16 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
167 |
{ |
168 |
return ff_bfin_z_sad16x16 (blk1,blk2,line_size,line_size,h);
|
169 |
} |
170 |
|
171 |
static uint8_t vtmp_blk[256] __attribute__((l1_data_B)); |
172 |
|
173 |
static int bfin_pix_abs16_x2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
174 |
{ |
175 |
ff_bfin_put_pixels16uc (vtmp_blk, blk2, blk2+1, 16, line_size, h); |
176 |
return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h); |
177 |
} |
178 |
|
179 |
static int bfin_pix_abs16_y2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
180 |
{ |
181 |
ff_bfin_put_pixels16uc (vtmp_blk, blk2, blk2+line_size, 16, line_size, h);
|
182 |
return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h); |
183 |
} |
184 |
|
185 |
static int bfin_pix_abs16_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
186 |
{ |
187 |
ff_bfin_z_put_pixels16_xy2 (vtmp_blk, blk2, 16, line_size, h);
|
188 |
return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h); |
189 |
} |
190 |
|
191 |
static int bfin_pix_abs8 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
192 |
{ |
193 |
return ff_bfin_z_sad8x8 (blk1,blk2,line_size,line_size, h);
|
194 |
} |
195 |
|
196 |
static int bfin_pix_abs8_x2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
197 |
{ |
198 |
ff_bfin_put_pixels8uc (vtmp_blk, blk2, blk2+1, 8, line_size, h); |
199 |
return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h); |
200 |
} |
201 |
|
202 |
static int bfin_pix_abs8_y2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
203 |
{ |
204 |
ff_bfin_put_pixels8uc (vtmp_blk, blk2, blk2+line_size, 8, line_size, h);
|
205 |
return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h); |
206 |
} |
207 |
|
208 |
static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
209 |
{ |
210 |
ff_bfin_z_put_pixels8_xy2 (vtmp_blk, blk2, 8, line_size, h);
|
211 |
return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h); |
212 |
} |
213 |
|
214 |
|
215 |
/*
|
216 |
decoder optimization
|
217 |
start on 2/11 100 frames of 352x240@25 compiled with no optimization -g debugging
|
218 |
9.824s ~ 2.44x off
|
219 |
6.360s ~ 1.58x off with -O2
|
220 |
5.740s ~ 1.43x off with idcts
|
221 |
|
222 |
2.64s 2/20 same sman.mp4 decode only
|
223 |
|
224 |
*/
|
225 |
|
226 |
void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
|
227 |
{ |
228 |
c->get_pixels = ff_bfin_get_pixels; |
229 |
c->diff_pixels = ff_bfin_diff_pixels; |
230 |
c->put_pixels_clamped = ff_bfin_put_pixels_clamped; |
231 |
c->add_pixels_clamped = ff_bfin_add_pixels_clamped; |
232 |
|
233 |
c->clear_blocks = bfin_clear_blocks; |
234 |
c->pix_sum = ff_bfin_pix_sum; |
235 |
c->pix_norm1 = ff_bfin_pix_norm1; |
236 |
|
237 |
c->sad[0] = bfin_pix_abs16;
|
238 |
c->sad[1] = bfin_pix_abs8;
|
239 |
|
240 |
/* TODO [0] 16 [1] 8 */
|
241 |
c->pix_abs[0][0] = bfin_pix_abs16; |
242 |
c->pix_abs[0][1] = bfin_pix_abs16_x2; |
243 |
c->pix_abs[0][2] = bfin_pix_abs16_y2; |
244 |
c->pix_abs[0][3] = bfin_pix_abs16_xy2; |
245 |
|
246 |
c->pix_abs[1][0] = bfin_pix_abs8; |
247 |
c->pix_abs[1][1] = bfin_pix_abs8_x2; |
248 |
c->pix_abs[1][2] = bfin_pix_abs8_y2; |
249 |
c->pix_abs[1][3] = bfin_pix_abs8_xy2; |
250 |
|
251 |
|
252 |
c->sse[0] = ff_bfin_sse16;
|
253 |
c->sse[1] = ff_bfin_sse8;
|
254 |
c->sse[2] = ff_bfin_sse4;
|
255 |
|
256 |
|
257 |
/**
|
258 |
* Halfpel motion compensation with rounding (a+b+1)>>1.
|
259 |
* This is an array[4][4] of motion compensation functions for 4
|
260 |
* horizontal blocksizes (8,16) and the 4 halfpel positions
|
261 |
* *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
|
262 |
* @param block destination where the result is stored
|
263 |
* @param pixels source
|
264 |
* @param line_size number of bytes in a horizontal line of block
|
265 |
* @param h height
|
266 |
*/
|
267 |
|
268 |
c->put_pixels_tab[0][0] = bfin_put_pixels16; |
269 |
c->put_pixels_tab[0][1] = bfin_put_pixels16_x2; |
270 |
c->put_pixels_tab[0][2] = bfin_put_pixels16_y2; |
271 |
c->put_pixels_tab[0][3] = bfin_put_pixels16_xy2; |
272 |
|
273 |
c->put_pixels_tab[1][0] = bfin_put_pixels8; |
274 |
c->put_pixels_tab[1][1] = bfin_put_pixels8_x2; |
275 |
c->put_pixels_tab[1][2] = bfin_put_pixels8_y2; |
276 |
c->put_pixels_tab[1][3] = bfin_put_pixels8_xy2; |
277 |
|
278 |
c->put_no_rnd_pixels_tab[1][0] = bfin_put_pixels8_nornd; |
279 |
c->put_no_rnd_pixels_tab[1][1] = bfin_put_pixels8_x2_nornd; |
280 |
c->put_no_rnd_pixels_tab[1][2] = bfin_put_pixels8_y2_nornd; |
281 |
c->put_no_rnd_pixels_tab[1][3] = ff_bfin_put_pixels8_xy2_nornd; |
282 |
|
283 |
c->put_no_rnd_pixels_tab[0][0] = bfin_put_pixels16_nornd; |
284 |
c->put_no_rnd_pixels_tab[0][1] = bfin_put_pixels16_x2_nornd; |
285 |
c->put_no_rnd_pixels_tab[0][2] = bfin_put_pixels16_y2_nornd; |
286 |
c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; |
287 |
|
288 |
c->fdct = ff_bfin_fdct; |
289 |
c->idct = ff_bfin_idct; |
290 |
c->idct_add = bfin_idct_add; |
291 |
c->idct_put = bfin_idct_put; |
292 |
} |
293 |
|
294 |
|
295 |
|