Statistics
| Branch: | Revision:

ffmpeg / libavcodec / ppc / dsputil_ppc.c @ 35f97369

History | View | Annotate | Download (9.62 KB)

1
/*
2
 * Copyright (c) 2002 Brian Foley
3
 * Copyright (c) 2002 Dieter Shirley
4
 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23
#include "../dsputil.h"
24

    
25
#include "dsputil_ppc.h"
26

    
27
#ifdef HAVE_ALTIVEC
28
#include "dsputil_altivec.h"
29

    
30
extern void fdct_altivec(int16_t *block);
31
extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
32
                         int x16, int y16, int rounder);
33
extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
34
extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
35

    
36
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
37

    
38
void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx);
39
void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx);
40
void snow_init_altivec(DSPContext* c, AVCodecContext *avctx);
41
void float_init_altivec(DSPContext* c, AVCodecContext *avctx);
42

    
43
#endif
44

    
45
int mm_flags = 0;
46

    
47
int mm_support(void)
48
{
49
    int result = 0;
50
#ifdef HAVE_ALTIVEC
51
    if (has_altivec()) {
52
        result |= MM_ALTIVEC;
53
    }
54
#endif /* result */
55
    return result;
56
}
57

    
58
#ifdef CONFIG_POWERPC_PERF
59
unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
60
/* list below must match enum in dsputil_ppc.h */
61
static unsigned char* perfname[] = {
62
  "ff_fft_calc_altivec",
63
  "gmc1_altivec",
64
  "dct_unquantize_h263_altivec",
65
  "fdct_altivec",
66
  "idct_add_altivec",
67
  "idct_put_altivec",
68
  "put_pixels16_altivec",
69
  "avg_pixels16_altivec",
70
  "avg_pixels8_altivec",
71
  "put_pixels8_xy2_altivec",
72
  "put_no_rnd_pixels8_xy2_altivec",
73
  "put_pixels16_xy2_altivec",
74
  "put_no_rnd_pixels16_xy2_altivec",
75
  "hadamard8_diff8x8_altivec",
76
  "hadamard8_diff16_altivec",
77
  "avg_pixels8_xy2_altivec",
78
  "clear_blocks_dcbz32_ppc",
79
  "clear_blocks_dcbz128_ppc",
80
  "put_h264_chroma_mc8_altivec",
81
  "avg_h264_chroma_mc8_altivec",
82
  "put_h264_qpel16_h_lowpass_altivec",
83
  "avg_h264_qpel16_h_lowpass_altivec",
84
  "put_h264_qpel16_v_lowpass_altivec",
85
  "avg_h264_qpel16_v_lowpass_altivec",
86
  "put_h264_qpel16_hv_lowpass_altivec",
87
  "avg_h264_qpel16_hv_lowpass_altivec",
88
  ""
89
};
90
#include <stdio.h>
91
#endif
92

    
93
#ifdef CONFIG_POWERPC_PERF
94
void powerpc_display_perf_report(void)
95
{
96
  int i, j;
97
  av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
98
  for(i = 0 ; i < powerpc_perf_total ; i++)
99
  {
100
    for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
101
      {
102
        if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
103
          av_log(NULL, AV_LOG_INFO,
104
                  " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
105
                  perfname[i],
106
                  j+1,
107
                  perfdata[j][i][powerpc_data_min],
108
                  perfdata[j][i][powerpc_data_max],
109
                  (double)perfdata[j][i][powerpc_data_sum] /
110
                  (double)perfdata[j][i][powerpc_data_num],
111
                  perfdata[j][i][powerpc_data_num]);
112
      }
113
  }
114
}
115
#endif /* CONFIG_POWERPC_PERF */
116

    
117
/* ***** WARNING ***** WARNING ***** WARNING ***** */
118
/*
119
  clear_blocks_dcbz32_ppc will not work properly
120
  on PowerPC processors with a cache line size
121
  not equal to 32 bytes.
122
  Fortunately all processor used by Apple up to
123
  at least the 7450 (aka second generation G4)
124
  use 32 bytes cache line.
125
  This is due to the use of the 'dcbz' instruction.
126
  It simply clear to zero a single cache line,
127
  so you need to know the cache line size to use it !
128
  It's absurd, but it's fast...
129

130
  update 24/06/2003 : Apple released yesterday the G5,
131
  with a PPC970. cache line size : 128 bytes. Oups.
132
  The semantic of dcbz was changed, it always clear
133
  32 bytes. so the function below will work, but will
134
  be slow. So I fixed check_dcbz_effect to use dcbzl,
135
  which is defined to clear a cache line (as dcbz before).
136
  So we still can distinguish, and use dcbz (32 bytes)
137
  or dcbzl (one cache line) as required.
138

139
  see <http://developer.apple.com/technotes/tn/tn2087.html>
140
  and <http://developer.apple.com/technotes/tn/tn2086.html>
141
*/
142
void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
143
{
144
POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
145
    register int misal = ((unsigned long)blocks & 0x00000010);
146
    register int i = 0;
147
POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
148
#if 1
149
    if (misal) {
150
      ((unsigned long*)blocks)[0] = 0L;
151
      ((unsigned long*)blocks)[1] = 0L;
152
      ((unsigned long*)blocks)[2] = 0L;
153
      ((unsigned long*)blocks)[3] = 0L;
154
      i += 16;
155
    }
156
    for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
157
#ifndef __MWERKS__
158
      asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
159
#else
160
      __dcbz( blocks, i );
161
#endif
162
    }
163
    if (misal) {
164
      ((unsigned long*)blocks)[188] = 0L;
165
      ((unsigned long*)blocks)[189] = 0L;
166
      ((unsigned long*)blocks)[190] = 0L;
167
      ((unsigned long*)blocks)[191] = 0L;
168
      i += 16;
169
    }
170
#else
171
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
172
#endif
173
POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
174
}
175

    
176
/* same as above, when dcbzl clear a whole 128B cache line
177
   i.e. the PPC970 aka G5 */
178
#ifdef HAVE_DCBZL
179
void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
180
{
181
POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
182
    register int misal = ((unsigned long)blocks & 0x0000007f);
183
    register int i = 0;
184
POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
185
#if 1
186
 if (misal) {
187
   // we could probably also optimize this case,
188
   // but there's not much point as the machines
189
   // aren't available yet (2003-06-26)
190
      memset(blocks, 0, sizeof(DCTELEM)*6*64);
191
    }
192
    else
193
      for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
194
        asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
195
      }
196
#else
197
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
198
#endif
199
POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
200
}
201
#else
202
void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
203
{
204
  memset(blocks, 0, sizeof(DCTELEM)*6*64);
205
}
206
#endif
207

    
208
#ifdef HAVE_DCBZL
209
/* check dcbz report how many bytes are set to 0 by dcbz */
210
/* update 24/06/2003 : replace dcbz by dcbzl to get
211
   the intended effect (Apple "fixed" dcbz)
212
   unfortunately this cannot be used unless the assembler
213
   knows about dcbzl ... */
214
long check_dcbzl_effect(void)
215
{
216
  register char *fakedata = (char*)av_malloc(1024);
217
  register char *fakedata_middle;
218
  register long zero = 0;
219
  register long i = 0;
220
  long count = 0;
221

    
222
  if (!fakedata)
223
  {
224
    return 0L;
225
  }
226

    
227
  fakedata_middle = (fakedata + 512);
228

    
229
  memset(fakedata, 0xFF, 1024);
230

    
231
  /* below the constraint "b" seems to mean "Address base register"
232
     in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
233
  asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
234

    
235
  for (i = 0; i < 1024 ; i ++)
236
  {
237
    if (fakedata[i] == (char)0)
238
      count++;
239
  }
240

    
241
  av_free(fakedata);
242

    
243
  return count;
244
}
245
#else
246
long check_dcbzl_effect(void)
247
{
248
  return 0;
249
}
250
#endif
251

    
252
static void prefetch_ppc(void *mem, int stride, int h)
253
{
254
    register const uint8_t *p = mem;
255
    do {
256
        asm volatile ("dcbt 0,%0" : : "r" (p));
257
        p+= stride;
258
    } while(--h);
259
}
260

    
261
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
262
{
263
    // Common optimizations whether Altivec is available or not
264
    c->prefetch = prefetch_ppc;
265
    switch (check_dcbzl_effect()) {
266
        case 32:
267
            c->clear_blocks = clear_blocks_dcbz32_ppc;
268
            break;
269
        case 128:
270
            c->clear_blocks = clear_blocks_dcbz128_ppc;
271
            break;
272
        default:
273
            break;
274
    }
275

    
276
#ifdef HAVE_ALTIVEC
277
    if(ENABLE_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
278

    
279
    if (has_altivec()) {
280
        mm_flags |= MM_ALTIVEC;
281

    
282
        dsputil_init_altivec(c, avctx);
283
        if(ENABLE_SNOW_DECODER) snow_init_altivec(c, avctx);
284
        if(ENABLE_VC1_DECODER || ENABLE_WMV3_DECODER)
285
            vc1dsp_init_altivec(c, avctx);
286
        float_init_altivec(c, avctx);
287
        c->gmc1 = gmc1_altivec;
288

    
289
#ifdef CONFIG_ENCODERS
290
        if (avctx->dct_algo == FF_DCT_AUTO ||
291
            avctx->dct_algo == FF_DCT_ALTIVEC)
292
        {
293
            c->fdct = fdct_altivec;
294
        }
295
#endif //CONFIG_ENCODERS
296

    
297
        if (avctx->lowres==0)
298
        {
299
        if ((avctx->idct_algo == FF_IDCT_AUTO) ||
300
                (avctx->idct_algo == FF_IDCT_ALTIVEC))
301
        {
302
            c->idct_put = idct_put_altivec;
303
            c->idct_add = idct_add_altivec;
304
            c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
305
        }
306
        }
307

    
308
#ifdef CONFIG_POWERPC_PERF
309
        {
310
          int i, j;
311
          for (i = 0 ; i < powerpc_perf_total ; i++)
312
          {
313
            for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
314
              {
315
                perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
316
                perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
317
                perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
318
                perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
319
              }
320
          }
321
        }
322
#endif /* CONFIG_POWERPC_PERF */
323
    }
324
#endif /* HAVE_ALTIVEC */
325
}