Statistics
| Branch: | Revision:

ffmpeg / libavcodec / ppc / dsputil_ppc.c @ b550bfaa

History | View | Annotate | Download (9.71 KB)

1
/*
2
 * Copyright (c) 2002 Brian Foley
3
 * Copyright (c) 2002 Dieter Shirley
4
 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23
#include "dsputil.h"
24

    
25
#include "dsputil_ppc.h"
26

    
27
#ifdef HAVE_ALTIVEC
28
#include "dsputil_altivec.h"
29

    
30
extern void fdct_altivec(int16_t *block);
31
extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
32
                         int x16, int y16, int rounder);
33
extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
34
extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
35

    
36
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
37

    
38
void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx);
39
void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx);
40
void snow_init_altivec(DSPContext* c, AVCodecContext *avctx);
41
void float_init_altivec(DSPContext* c, AVCodecContext *avctx);
42
void int_init_altivec(DSPContext* c, AVCodecContext *avctx);
43

    
44
#endif
45

    
46
int mm_flags = 0;
47

    
48
int mm_support(void)
49
{
50
    int result = 0;
51
#ifdef HAVE_ALTIVEC
52
    if (has_altivec()) {
53
        result |= MM_ALTIVEC;
54
    }
55
#endif /* result */
56
    return result;
57
}
58

    
59
#ifdef CONFIG_POWERPC_PERF
60
unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
61
/* list below must match enum in dsputil_ppc.h */
62
static unsigned char* perfname[] = {
63
  "ff_fft_calc_altivec",
64
  "gmc1_altivec",
65
  "dct_unquantize_h263_altivec",
66
  "fdct_altivec",
67
  "idct_add_altivec",
68
  "idct_put_altivec",
69
  "put_pixels16_altivec",
70
  "avg_pixels16_altivec",
71
  "avg_pixels8_altivec",
72
  "put_pixels8_xy2_altivec",
73
  "put_no_rnd_pixels8_xy2_altivec",
74
  "put_pixels16_xy2_altivec",
75
  "put_no_rnd_pixels16_xy2_altivec",
76
  "hadamard8_diff8x8_altivec",
77
  "hadamard8_diff16_altivec",
78
  "avg_pixels8_xy2_altivec",
79
  "clear_blocks_dcbz32_ppc",
80
  "clear_blocks_dcbz128_ppc",
81
  "put_h264_chroma_mc8_altivec",
82
  "avg_h264_chroma_mc8_altivec",
83
  "put_h264_qpel16_h_lowpass_altivec",
84
  "avg_h264_qpel16_h_lowpass_altivec",
85
  "put_h264_qpel16_v_lowpass_altivec",
86
  "avg_h264_qpel16_v_lowpass_altivec",
87
  "put_h264_qpel16_hv_lowpass_altivec",
88
  "avg_h264_qpel16_hv_lowpass_altivec",
89
  ""
90
};
91
#include <stdio.h>
92
#endif
93

    
94
#ifdef CONFIG_POWERPC_PERF
95
void powerpc_display_perf_report(void)
96
{
97
  int i, j;
98
  av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
99
  for(i = 0 ; i < powerpc_perf_total ; i++)
100
  {
101
    for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
102
      {
103
        if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
104
          av_log(NULL, AV_LOG_INFO,
105
                  " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
106
                  perfname[i],
107
                  j+1,
108
                  perfdata[j][i][powerpc_data_min],
109
                  perfdata[j][i][powerpc_data_max],
110
                  (double)perfdata[j][i][powerpc_data_sum] /
111
                  (double)perfdata[j][i][powerpc_data_num],
112
                  perfdata[j][i][powerpc_data_num]);
113
      }
114
  }
115
}
116
#endif /* CONFIG_POWERPC_PERF */
117

    
118
/* ***** WARNING ***** WARNING ***** WARNING ***** */
119
/*
120
  clear_blocks_dcbz32_ppc will not work properly
121
  on PowerPC processors with a cache line size
122
  not equal to 32 bytes.
123
  Fortunately all processor used by Apple up to
124
  at least the 7450 (aka second generation G4)
125
  use 32 bytes cache line.
126
  This is due to the use of the 'dcbz' instruction.
127
  It simply clear to zero a single cache line,
128
  so you need to know the cache line size to use it !
129
  It's absurd, but it's fast...
130

131
  update 24/06/2003 : Apple released yesterday the G5,
132
  with a PPC970. cache line size : 128 bytes. Oups.
133
  The semantic of dcbz was changed, it always clear
134
  32 bytes. so the function below will work, but will
135
  be slow. So I fixed check_dcbz_effect to use dcbzl,
136
  which is defined to clear a cache line (as dcbz before).
137
  So we still can distinguish, and use dcbz (32 bytes)
138
  or dcbzl (one cache line) as required.
139

140
  see <http://developer.apple.com/technotes/tn/tn2087.html>
141
  and <http://developer.apple.com/technotes/tn/tn2086.html>
142
*/
143
void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
144
{
145
POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
146
    register int misal = ((unsigned long)blocks & 0x00000010);
147
    register int i = 0;
148
POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
149
#if 1
150
    if (misal) {
151
      ((unsigned long*)blocks)[0] = 0L;
152
      ((unsigned long*)blocks)[1] = 0L;
153
      ((unsigned long*)blocks)[2] = 0L;
154
      ((unsigned long*)blocks)[3] = 0L;
155
      i += 16;
156
    }
157
    for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
158
#ifndef __MWERKS__
159
      asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
160
#else
161
      __dcbz( blocks, i );
162
#endif
163
    }
164
    if (misal) {
165
      ((unsigned long*)blocks)[188] = 0L;
166
      ((unsigned long*)blocks)[189] = 0L;
167
      ((unsigned long*)blocks)[190] = 0L;
168
      ((unsigned long*)blocks)[191] = 0L;
169
      i += 16;
170
    }
171
#else
172
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
173
#endif
174
POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
175
}
176

    
177
/* same as above, when dcbzl clear a whole 128B cache line
178
   i.e. the PPC970 aka G5 */
179
#ifdef HAVE_DCBZL
180
void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
181
{
182
POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
183
    register int misal = ((unsigned long)blocks & 0x0000007f);
184
    register int i = 0;
185
POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
186
#if 1
187
 if (misal) {
188
   // we could probably also optimize this case,
189
   // but there's not much point as the machines
190
   // aren't available yet (2003-06-26)
191
      memset(blocks, 0, sizeof(DCTELEM)*6*64);
192
    }
193
    else
194
      for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
195
        asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
196
      }
197
#else
198
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
199
#endif
200
POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
201
}
202
#else
203
void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
204
{
205
  memset(blocks, 0, sizeof(DCTELEM)*6*64);
206
}
207
#endif
208

    
209
#ifdef HAVE_DCBZL
210
/* check dcbz report how many bytes are set to 0 by dcbz */
211
/* update 24/06/2003 : replace dcbz by dcbzl to get
212
   the intended effect (Apple "fixed" dcbz)
213
   unfortunately this cannot be used unless the assembler
214
   knows about dcbzl ... */
215
long check_dcbzl_effect(void)
216
{
217
  register char *fakedata = (char*)av_malloc(1024);
218
  register char *fakedata_middle;
219
  register long zero = 0;
220
  register long i = 0;
221
  long count = 0;
222

    
223
  if (!fakedata)
224
  {
225
    return 0L;
226
  }
227

    
228
  fakedata_middle = (fakedata + 512);
229

    
230
  memset(fakedata, 0xFF, 1024);
231

    
232
  /* below the constraint "b" seems to mean "Address base register"
233
     in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
234
  asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
235

    
236
  for (i = 0; i < 1024 ; i ++)
237
  {
238
    if (fakedata[i] == (char)0)
239
      count++;
240
  }
241

    
242
  av_free(fakedata);
243

    
244
  return count;
245
}
246
#else
247
long check_dcbzl_effect(void)
248
{
249
  return 0;
250
}
251
#endif
252

    
253
static void prefetch_ppc(void *mem, int stride, int h)
254
{
255
    register const uint8_t *p = mem;
256
    do {
257
        asm volatile ("dcbt 0,%0" : : "r" (p));
258
        p+= stride;
259
    } while(--h);
260
}
261

    
262
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
263
{
264
    // Common optimizations whether Altivec is available or not
265
    c->prefetch = prefetch_ppc;
266
    switch (check_dcbzl_effect()) {
267
        case 32:
268
            c->clear_blocks = clear_blocks_dcbz32_ppc;
269
            break;
270
        case 128:
271
            c->clear_blocks = clear_blocks_dcbz128_ppc;
272
            break;
273
        default:
274
            break;
275
    }
276

    
277
#ifdef HAVE_ALTIVEC
278
    if(ENABLE_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
279

    
280
    if (has_altivec()) {
281
        mm_flags |= MM_ALTIVEC;
282

    
283
        dsputil_init_altivec(c, avctx);
284
        if(ENABLE_SNOW_DECODER) snow_init_altivec(c, avctx);
285
        if(ENABLE_VC1_DECODER || ENABLE_WMV3_DECODER)
286
            vc1dsp_init_altivec(c, avctx);
287
        float_init_altivec(c, avctx);
288
        int_init_altivec(c, avctx);
289
        c->gmc1 = gmc1_altivec;
290

    
291
#ifdef CONFIG_ENCODERS
292
        if (avctx->dct_algo == FF_DCT_AUTO ||
293
            avctx->dct_algo == FF_DCT_ALTIVEC)
294
        {
295
            c->fdct = fdct_altivec;
296
        }
297
#endif //CONFIG_ENCODERS
298

    
299
        if (avctx->lowres==0)
300
        {
301
        if ((avctx->idct_algo == FF_IDCT_AUTO) ||
302
                (avctx->idct_algo == FF_IDCT_ALTIVEC))
303
        {
304
            c->idct_put = idct_put_altivec;
305
            c->idct_add = idct_add_altivec;
306
            c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
307
        }
308
        }
309

    
310
#ifdef CONFIG_POWERPC_PERF
311
        {
312
          int i, j;
313
          for (i = 0 ; i < powerpc_perf_total ; i++)
314
          {
315
            for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
316
              {
317
                perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
318
                perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
319
                perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
320
                perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
321
              }
322
          }
323
        }
324
#endif /* CONFIG_POWERPC_PERF */
325
    }
326
#endif /* HAVE_ALTIVEC */
327
}