ffmpeg / libavcodec / bfin / dsputil_bfin.c @ 8dbe5856
History | View | Annotate | Download (9.07 KB)
1 |
/*
|
---|---|
2 |
* BlackFin DSPUTILS
|
3 |
*
|
4 |
* Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
|
5 |
* Copyright (c) 2006 Michael Benjamin <michael.benjamin@analog.com>
|
6 |
*
|
7 |
* This file is part of FFmpeg.
|
8 |
*
|
9 |
* FFmpeg is free software; you can redistribute it and/or
|
10 |
* modify it under the terms of the GNU Lesser General Public
|
11 |
* License as published by the Free Software Foundation; either
|
12 |
* version 2.1 of the License, or (at your option) any later version.
|
13 |
*
|
14 |
* FFmpeg is distributed in the hope that it will be useful,
|
15 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
17 |
* Lesser General Public License for more details.
|
18 |
*
|
19 |
* You should have received a copy of the GNU Lesser General Public
|
20 |
* License along with FFmpeg; if not, write to the Free Software
|
21 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
22 |
*/
|
23 |
|
24 |
#include "libavcodec/avcodec.h" |
25 |
#include "libavcodec/dsputil.h" |
26 |
#include "dsputil_bfin.h" |
27 |
|
28 |
int off;
|
29 |
|
30 |
static void bfin_idct_add (uint8_t *dest, int line_size, DCTELEM *block) |
31 |
{ |
32 |
ff_bfin_idct (block); |
33 |
ff_bfin_add_pixels_clamped (block, dest, line_size); |
34 |
} |
35 |
|
36 |
static void bfin_idct_put (uint8_t *dest, int line_size, DCTELEM *block) |
37 |
{ |
38 |
ff_bfin_idct (block); |
39 |
ff_bfin_put_pixels_clamped (block, dest, line_size); |
40 |
} |
41 |
|
42 |
|
43 |
static void bfin_clear_blocks (DCTELEM *blocks) |
44 |
{ |
45 |
// This is just a simple memset.
|
46 |
//
|
47 |
__asm__("P0=192; "
|
48 |
"I0=%0; "
|
49 |
"R0=0; "
|
50 |
"LSETUP(clear_blocks_blkfn_lab,clear_blocks_blkfn_lab)LC0=P0;"
|
51 |
"clear_blocks_blkfn_lab:"
|
52 |
"[I0++]=R0;"
|
53 |
::"a" (blocks):"P0","I0","R0"); |
54 |
} |
55 |
|
56 |
|
57 |
|
58 |
static void bfin_put_pixels8 (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
59 |
{ |
60 |
ff_bfin_put_pixels8uc (block, pixels, pixels, line_size, line_size, h); |
61 |
} |
62 |
|
63 |
static void bfin_put_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
64 |
{ |
65 |
ff_bfin_put_pixels8uc (block, pixels, pixels+1, line_size, line_size, h);
|
66 |
} |
67 |
|
68 |
static void bfin_put_pixels8_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
69 |
{ |
70 |
ff_bfin_put_pixels8uc (block, pixels, pixels+line_size, line_size, line_size, h); |
71 |
} |
72 |
|
73 |
static void bfin_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h) |
74 |
{ |
75 |
ff_bfin_z_put_pixels8_xy2 (block,s0,line_size, line_size, h); |
76 |
} |
77 |
|
78 |
static void bfin_put_pixels16 (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
79 |
{ |
80 |
ff_bfin_put_pixels16uc (block, pixels, pixels, line_size, line_size, h); |
81 |
} |
82 |
|
83 |
static void bfin_put_pixels16_x2 (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
84 |
{ |
85 |
ff_bfin_put_pixels16uc (block, pixels, pixels+1, line_size, line_size, h);
|
86 |
} |
87 |
|
88 |
static void bfin_put_pixels16_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
89 |
{ |
90 |
ff_bfin_put_pixels16uc (block, pixels, pixels+line_size, line_size, line_size, h); |
91 |
} |
92 |
|
93 |
static void bfin_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h) |
94 |
{ |
95 |
ff_bfin_z_put_pixels16_xy2 (block,s0,line_size, line_size, h); |
96 |
} |
97 |
|
98 |
static void bfin_put_pixels8_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
99 |
{ |
100 |
ff_bfin_put_pixels8uc_nornd (block, pixels, pixels, line_size, h); |
101 |
} |
102 |
|
103 |
static void bfin_put_pixels8_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
104 |
{ |
105 |
ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+1, line_size, h);
|
106 |
} |
107 |
|
108 |
static void bfin_put_pixels8_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
109 |
{ |
110 |
ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+line_size, line_size, h); |
111 |
} |
112 |
|
113 |
|
114 |
static void bfin_put_pixels16_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
115 |
{ |
116 |
ff_bfin_put_pixels16uc_nornd (block, pixels, pixels, line_size, h); |
117 |
} |
118 |
|
119 |
static void bfin_put_pixels16_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
120 |
{ |
121 |
ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+1, line_size, h);
|
122 |
} |
123 |
|
124 |
static void bfin_put_pixels16_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h) |
125 |
{ |
126 |
ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+line_size, line_size, h); |
127 |
} |
128 |
|
129 |
static int bfin_pix_abs16 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
130 |
{ |
131 |
return ff_bfin_z_sad16x16 (blk1,blk2,line_size,line_size,h);
|
132 |
} |
133 |
|
134 |
static int bfin_vsad_intra16 (void *c, uint8_t *blk1, uint8_t *dummy, int stride, int h) { |
135 |
return ff_bfin_z_sad16x16 (blk1,blk1+stride,stride<<1,stride<<1,h); |
136 |
} |
137 |
|
138 |
static int bfin_vsad (void *c, uint8_t *blk1, uint8_t *blk2, int stride, int h) { |
139 |
return ff_bfin_z_sad16x16 (blk1,blk1+stride,stride<<1,stride<<1,h) |
140 |
+ ff_bfin_z_sad16x16 (blk2,blk2+stride,stride<<1,stride<<1,h); |
141 |
} |
142 |
|
143 |
static uint8_t vtmp_blk[256] attribute_l1_data_b; |
144 |
|
145 |
static int bfin_pix_abs16_x2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
146 |
{ |
147 |
ff_bfin_put_pixels16uc (vtmp_blk, blk2, blk2+1, 16, line_size, h); |
148 |
return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h); |
149 |
} |
150 |
|
151 |
static int bfin_pix_abs16_y2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
152 |
{ |
153 |
ff_bfin_put_pixels16uc (vtmp_blk, blk2, blk2+line_size, 16, line_size, h);
|
154 |
return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h); |
155 |
} |
156 |
|
157 |
static int bfin_pix_abs16_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
158 |
{ |
159 |
ff_bfin_z_put_pixels16_xy2 (vtmp_blk, blk2, 16, line_size, h);
|
160 |
return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h); |
161 |
} |
162 |
|
163 |
static int bfin_pix_abs8 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
164 |
{ |
165 |
return ff_bfin_z_sad8x8 (blk1,blk2,line_size,line_size, h);
|
166 |
} |
167 |
|
168 |
static int bfin_pix_abs8_x2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
169 |
{ |
170 |
ff_bfin_put_pixels8uc (vtmp_blk, blk2, blk2+1, 8, line_size, h); |
171 |
return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h); |
172 |
} |
173 |
|
174 |
static int bfin_pix_abs8_y2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
175 |
{ |
176 |
ff_bfin_put_pixels8uc (vtmp_blk, blk2, blk2+line_size, 8, line_size, h);
|
177 |
return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h); |
178 |
} |
179 |
|
180 |
static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h) |
181 |
{ |
182 |
ff_bfin_z_put_pixels8_xy2 (vtmp_blk, blk2, 8, line_size, h);
|
183 |
return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h); |
184 |
} |
185 |
|
186 |
|
187 |
/*
|
188 |
decoder optimization
|
189 |
start on 2/11 100 frames of 352x240@25 compiled with no optimization -g debugging
|
190 |
9.824s ~ 2.44x off
|
191 |
6.360s ~ 1.58x off with -O2
|
192 |
5.740s ~ 1.43x off with idcts
|
193 |
|
194 |
2.64s 2/20 same sman.mp4 decode only
|
195 |
|
196 |
*/
|
197 |
|
198 |
void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
|
199 |
{ |
200 |
const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; |
201 |
|
202 |
c->get_pixels = ff_bfin_get_pixels; |
203 |
c->diff_pixels = ff_bfin_diff_pixels; |
204 |
c->put_pixels_clamped = ff_bfin_put_pixels_clamped; |
205 |
c->add_pixels_clamped = ff_bfin_add_pixels_clamped; |
206 |
|
207 |
if (!h264_high_depth)
|
208 |
c->clear_blocks = bfin_clear_blocks; |
209 |
c->pix_sum = ff_bfin_pix_sum; |
210 |
c->pix_norm1 = ff_bfin_pix_norm1; |
211 |
|
212 |
c->sad[0] = bfin_pix_abs16;
|
213 |
c->sad[1] = bfin_pix_abs8;
|
214 |
|
215 |
/* c->vsad[0] = bfin_vsad; */
|
216 |
/* c->vsad[4] = bfin_vsad_intra16; */
|
217 |
|
218 |
/* TODO [0] 16 [1] 8 */
|
219 |
c->pix_abs[0][0] = bfin_pix_abs16; |
220 |
c->pix_abs[0][1] = bfin_pix_abs16_x2; |
221 |
c->pix_abs[0][2] = bfin_pix_abs16_y2; |
222 |
c->pix_abs[0][3] = bfin_pix_abs16_xy2; |
223 |
|
224 |
c->pix_abs[1][0] = bfin_pix_abs8; |
225 |
c->pix_abs[1][1] = bfin_pix_abs8_x2; |
226 |
c->pix_abs[1][2] = bfin_pix_abs8_y2; |
227 |
c->pix_abs[1][3] = bfin_pix_abs8_xy2; |
228 |
|
229 |
|
230 |
c->sse[0] = ff_bfin_sse16;
|
231 |
c->sse[1] = ff_bfin_sse8;
|
232 |
c->sse[2] = ff_bfin_sse4;
|
233 |
|
234 |
if (!h264_high_depth) {
|
235 |
c->put_pixels_tab[0][0] = bfin_put_pixels16; |
236 |
c->put_pixels_tab[0][1] = bfin_put_pixels16_x2; |
237 |
c->put_pixels_tab[0][2] = bfin_put_pixels16_y2; |
238 |
c->put_pixels_tab[0][3] = bfin_put_pixels16_xy2; |
239 |
|
240 |
c->put_pixels_tab[1][0] = bfin_put_pixels8; |
241 |
c->put_pixels_tab[1][1] = bfin_put_pixels8_x2; |
242 |
c->put_pixels_tab[1][2] = bfin_put_pixels8_y2; |
243 |
c->put_pixels_tab[1][3] = bfin_put_pixels8_xy2; |
244 |
|
245 |
c->put_no_rnd_pixels_tab[1][0] = bfin_put_pixels8_nornd; |
246 |
c->put_no_rnd_pixels_tab[1][1] = bfin_put_pixels8_x2_nornd; |
247 |
c->put_no_rnd_pixels_tab[1][2] = bfin_put_pixels8_y2_nornd; |
248 |
/* c->put_no_rnd_pixels_tab[1][3] = ff_bfin_put_pixels8_xy2_nornd; */
|
249 |
|
250 |
c->put_no_rnd_pixels_tab[0][0] = bfin_put_pixels16_nornd; |
251 |
c->put_no_rnd_pixels_tab[0][1] = bfin_put_pixels16_x2_nornd; |
252 |
c->put_no_rnd_pixels_tab[0][2] = bfin_put_pixels16_y2_nornd; |
253 |
/* c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; */
|
254 |
} |
255 |
|
256 |
if (avctx->dct_algo == FF_DCT_AUTO)
|
257 |
c->fdct = ff_bfin_fdct; |
258 |
|
259 |
if (avctx->idct_algo==FF_IDCT_VP3) {
|
260 |
c->idct_permutation_type = FF_NO_IDCT_PERM; |
261 |
c->idct = ff_bfin_vp3_idct; |
262 |
c->idct_add = ff_bfin_vp3_idct_add; |
263 |
c->idct_put = ff_bfin_vp3_idct_put; |
264 |
} else if (avctx->idct_algo == FF_IDCT_AUTO) { |
265 |
c->idct_permutation_type = FF_NO_IDCT_PERM; |
266 |
c->idct = ff_bfin_idct; |
267 |
c->idct_add = bfin_idct_add; |
268 |
c->idct_put = bfin_idct_put; |
269 |
} |
270 |
} |
271 |
|
272 |
|
273 |
|