Statistics
| Branch: | Revision:

ffmpeg / libavcodec / bfin / dsputil_bfin.c @ 2912e87a

History | View | Annotate | Download (8.91 KB)

1
/*
2
 * BlackFin DSPUTILS
3
 *
4
 * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
5
 * Copyright (c) 2006 Michael Benjamin <michael.benjamin@analog.com>
6
 *
7
 * This file is part of Libav.
8
 *
9
 * Libav is free software; you can redistribute it and/or
10
 * modify it under the terms of the GNU Lesser General Public
11
 * License as published by the Free Software Foundation; either
12
 * version 2.1 of the License, or (at your option) any later version.
13
 *
14
 * Libav is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 * Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with Libav; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
 */
23

    
24
#include "libavcodec/avcodec.h"
25
#include "libavcodec/dsputil.h"
26
#include "dsputil_bfin.h"
27

    
28
int off;
29

    
30
static void bfin_idct_add (uint8_t *dest, int line_size, DCTELEM *block)
31
{
32
    ff_bfin_idct (block);
33
    ff_bfin_add_pixels_clamped (block, dest, line_size);
34
}
35

    
36
static void bfin_idct_put (uint8_t *dest, int line_size, DCTELEM *block)
37
{
38
    ff_bfin_idct (block);
39
    ff_bfin_put_pixels_clamped (block, dest, line_size);
40
}
41

    
42

    
43
static void bfin_clear_blocks (DCTELEM *blocks)
44
{
45
    // This is just a simple memset.
46
    //
47
    __asm__("P0=192; "
48
        "I0=%0;  "
49
        "R0=0;   "
50
        "LSETUP(clear_blocks_blkfn_lab,clear_blocks_blkfn_lab)LC0=P0;"
51
        "clear_blocks_blkfn_lab:"
52
        "[I0++]=R0;"
53
        ::"a" (blocks):"P0","I0","R0");
54
}
55

    
56

    
57

    
58
static void bfin_put_pixels8 (uint8_t *block, const uint8_t *pixels, int line_size, int h)
59
{
60
    ff_bfin_put_pixels8uc (block, pixels, pixels, line_size, line_size, h);
61
}
62

    
63
static void bfin_put_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
64
{
65
    ff_bfin_put_pixels8uc (block, pixels, pixels+1, line_size, line_size, h);
66
}
67

    
68
static void bfin_put_pixels8_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h)
69
{
70
    ff_bfin_put_pixels8uc (block, pixels, pixels+line_size, line_size, line_size, h);
71
}
72

    
73
static void bfin_put_pixels8_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h)
74
{
75
    ff_bfin_z_put_pixels8_xy2 (block,s0,line_size, line_size, h);
76
}
77

    
78
static void bfin_put_pixels16 (uint8_t *block, const uint8_t *pixels, int line_size, int h)
79
{
80
    ff_bfin_put_pixels16uc (block, pixels, pixels, line_size, line_size, h);
81
}
82

    
83
static void bfin_put_pixels16_x2 (uint8_t *block, const uint8_t *pixels, int line_size, int h)
84
{
85
    ff_bfin_put_pixels16uc (block, pixels, pixels+1, line_size, line_size, h);
86
}
87

    
88
static void bfin_put_pixels16_y2 (uint8_t *block, const uint8_t *pixels, int line_size, int h)
89
{
90
    ff_bfin_put_pixels16uc (block, pixels, pixels+line_size, line_size, line_size, h);
91
}
92

    
93
static void bfin_put_pixels16_xy2 (uint8_t *block, const uint8_t *s0, int line_size, int h)
94
{
95
    ff_bfin_z_put_pixels16_xy2 (block,s0,line_size, line_size, h);
96
}
97

    
98
static void bfin_put_pixels8_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
99
{
100
    ff_bfin_put_pixels8uc_nornd (block, pixels, pixels, line_size, h);
101
}
102

    
103
static void bfin_put_pixels8_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
104
{
105
    ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+1, line_size, h);
106
}
107

    
108
static void bfin_put_pixels8_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
109
{
110
    ff_bfin_put_pixels8uc_nornd (block, pixels, pixels+line_size, line_size, h);
111
}
112

    
113

    
114
static void bfin_put_pixels16_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
115
{
116
    ff_bfin_put_pixels16uc_nornd (block, pixels, pixels, line_size, h);
117
}
118

    
119
static void bfin_put_pixels16_x2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
120
{
121
    ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+1, line_size, h);
122
}
123

    
124
static void bfin_put_pixels16_y2_nornd (uint8_t *block, const uint8_t *pixels, int line_size, int h)
125
{
126
    ff_bfin_put_pixels16uc_nornd (block, pixels, pixels+line_size, line_size, h);
127
}
128

    
129
static int bfin_pix_abs16 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
130
{
131
    return ff_bfin_z_sad16x16 (blk1,blk2,line_size,line_size,h);
132
}
133

    
134
static int bfin_vsad_intra16 (void *c, uint8_t *blk1, uint8_t *dummy, int stride, int h) {
135
    return ff_bfin_z_sad16x16 (blk1,blk1+stride,stride<<1,stride<<1,h);
136
}
137

    
138
static int bfin_vsad (void *c, uint8_t *blk1, uint8_t *blk2, int stride, int h) {
139
    return ff_bfin_z_sad16x16 (blk1,blk1+stride,stride<<1,stride<<1,h)
140
        + ff_bfin_z_sad16x16 (blk2,blk2+stride,stride<<1,stride<<1,h);
141
}
142

    
143
static uint8_t vtmp_blk[256] attribute_l1_data_b;
144

    
145
static int bfin_pix_abs16_x2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
146
{
147
    ff_bfin_put_pixels16uc (vtmp_blk, blk2, blk2+1, 16, line_size, h);
148
    return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h);
149
}
150

    
151
static int bfin_pix_abs16_y2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
152
{
153
    ff_bfin_put_pixels16uc (vtmp_blk, blk2, blk2+line_size, 16, line_size, h);
154
    return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h);
155
}
156

    
157
static int bfin_pix_abs16_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
158
{
159
    ff_bfin_z_put_pixels16_xy2 (vtmp_blk, blk2, 16, line_size, h);
160
    return ff_bfin_z_sad16x16 (blk1, vtmp_blk, line_size, 16, h);
161
}
162

    
163
static int bfin_pix_abs8 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
164
{
165
    return ff_bfin_z_sad8x8 (blk1,blk2,line_size,line_size, h);
166
}
167

    
168
static int bfin_pix_abs8_x2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
169
{
170
    ff_bfin_put_pixels8uc (vtmp_blk, blk2, blk2+1, 8, line_size, h);
171
    return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h);
172
}
173

    
174
static int bfin_pix_abs8_y2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
175
{
176
    ff_bfin_put_pixels8uc (vtmp_blk, blk2, blk2+line_size, 8, line_size, h);
177
    return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h);
178
}
179

    
180
static int bfin_pix_abs8_xy2 (void *c, uint8_t *blk1, uint8_t *blk2, int line_size, int h)
181
{
182
    ff_bfin_z_put_pixels8_xy2 (vtmp_blk, blk2, 8, line_size, h);
183
    return ff_bfin_z_sad8x8 (blk1, vtmp_blk, line_size, 8, h);
184
}
185

    
186

    
187
/*
188
  decoder optimization
189
  start on 2/11 100 frames of 352x240@25 compiled with no optimization -g debugging
190
  9.824s ~ 2.44x off
191
  6.360s ~ 1.58x off with -O2
192
  5.740s ~ 1.43x off with idcts
193

194
  2.64s    2/20 same sman.mp4 decode only
195

196
*/
197

    
198
void dsputil_init_bfin( DSPContext* c, AVCodecContext *avctx )
199
{
200
    c->get_pixels         = ff_bfin_get_pixels;
201
    c->diff_pixels        = ff_bfin_diff_pixels;
202
    c->put_pixels_clamped = ff_bfin_put_pixels_clamped;
203
    c->add_pixels_clamped = ff_bfin_add_pixels_clamped;
204

    
205
    c->clear_blocks       = bfin_clear_blocks;
206
    c->pix_sum            = ff_bfin_pix_sum;
207
    c->pix_norm1          = ff_bfin_pix_norm1;
208

    
209
    c->sad[0]             = bfin_pix_abs16;
210
    c->sad[1]             = bfin_pix_abs8;
211

    
212
/*     c->vsad[0]            = bfin_vsad; */
213
/*     c->vsad[4]            = bfin_vsad_intra16; */
214

    
215
    /* TODO [0] 16  [1] 8 */
216
    c->pix_abs[0][0] = bfin_pix_abs16;
217
    c->pix_abs[0][1] = bfin_pix_abs16_x2;
218
    c->pix_abs[0][2] = bfin_pix_abs16_y2;
219
    c->pix_abs[0][3] = bfin_pix_abs16_xy2;
220

    
221
    c->pix_abs[1][0] = bfin_pix_abs8;
222
    c->pix_abs[1][1] = bfin_pix_abs8_x2;
223
    c->pix_abs[1][2] = bfin_pix_abs8_y2;
224
    c->pix_abs[1][3] = bfin_pix_abs8_xy2;
225

    
226

    
227
    c->sse[0] = ff_bfin_sse16;
228
    c->sse[1] = ff_bfin_sse8;
229
    c->sse[2] = ff_bfin_sse4;
230

    
231
    c->put_pixels_tab[0][0] = bfin_put_pixels16;
232
    c->put_pixels_tab[0][1] = bfin_put_pixels16_x2;
233
    c->put_pixels_tab[0][2] = bfin_put_pixels16_y2;
234
    c->put_pixels_tab[0][3] = bfin_put_pixels16_xy2;
235

    
236
    c->put_pixels_tab[1][0] = bfin_put_pixels8;
237
    c->put_pixels_tab[1][1] = bfin_put_pixels8_x2;
238
    c->put_pixels_tab[1][2] = bfin_put_pixels8_y2;
239
    c->put_pixels_tab[1][3] = bfin_put_pixels8_xy2;
240

    
241
    c->put_no_rnd_pixels_tab[1][0] = bfin_put_pixels8_nornd;
242
    c->put_no_rnd_pixels_tab[1][1] = bfin_put_pixels8_x2_nornd;
243
    c->put_no_rnd_pixels_tab[1][2] = bfin_put_pixels8_y2_nornd;
244
/*     c->put_no_rnd_pixels_tab[1][3] = ff_bfin_put_pixels8_xy2_nornd; */
245

    
246
    c->put_no_rnd_pixels_tab[0][0] = bfin_put_pixels16_nornd;
247
    c->put_no_rnd_pixels_tab[0][1] = bfin_put_pixels16_x2_nornd;
248
    c->put_no_rnd_pixels_tab[0][2] = bfin_put_pixels16_y2_nornd;
249
/*     c->put_no_rnd_pixels_tab[0][3] = ff_bfin_put_pixels16_xy2_nornd; */
250

    
251
    if (avctx->dct_algo == FF_DCT_AUTO)
252
        c->fdct               = ff_bfin_fdct;
253

    
254
    if (avctx->idct_algo==FF_IDCT_VP3) {
255
        c->idct_permutation_type = FF_NO_IDCT_PERM;
256
        c->idct               = ff_bfin_vp3_idct;
257
        c->idct_add           = ff_bfin_vp3_idct_add;
258
        c->idct_put           = ff_bfin_vp3_idct_put;
259
    } else if (avctx->idct_algo == FF_IDCT_AUTO) {
260
        c->idct_permutation_type = FF_NO_IDCT_PERM;
261
        c->idct               = ff_bfin_idct;
262
        c->idct_add           = bfin_idct_add;
263
        c->idct_put           = bfin_idct_put;
264
    }
265
}
266

    
267

    
268