Statistics
| Branch: | Revision:

ffmpeg / libavcodec / ppc / dsputil_ppc.c @ a1d0b6a2

History | View | Annotate | Download (9.66 KB)

1
/*
2
 * Copyright (c) 2002 Brian Foley
3
 * Copyright (c) 2002 Dieter Shirley
4
 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

    
23
#include "dsputil.h"
24

    
25
#include "dsputil_ppc.h"
26

    
27
#ifdef HAVE_ALTIVEC
28
#include "dsputil_altivec.h"
29

    
30
extern void fdct_altivec(int16_t *block);
31
extern void gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
32
                         int x16, int y16, int rounder);
33
extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
34
extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
35

    
36
void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
37

    
38
void dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx);
39
void vc1dsp_init_altivec(DSPContext* c, AVCodecContext *avctx);
40
void snow_init_altivec(DSPContext* c, AVCodecContext *avctx);
41
void float_init_altivec(DSPContext* c, AVCodecContext *avctx);
42
void int_init_altivec(DSPContext* c, AVCodecContext *avctx);
43

    
44
#endif
45

    
46
int mm_flags = 0;
47

    
48
int mm_support(void)
49
{
50
    int result = 0;
51
#ifdef HAVE_ALTIVEC
52
    if (has_altivec()) {
53
        result |= MM_ALTIVEC;
54
    }
55
#endif /* result */
56
    return result;
57
}
58

    
59
#ifdef CONFIG_POWERPC_PERF
60
unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
61
/* list below must match enum in dsputil_ppc.h */
62
static unsigned char* perfname[] = {
63
  "ff_fft_calc_altivec",
64
  "gmc1_altivec",
65
  "dct_unquantize_h263_altivec",
66
  "fdct_altivec",
67
  "idct_add_altivec",
68
  "idct_put_altivec",
69
  "put_pixels16_altivec",
70
  "avg_pixels16_altivec",
71
  "avg_pixels8_altivec",
72
  "put_pixels8_xy2_altivec",
73
  "put_no_rnd_pixels8_xy2_altivec",
74
  "put_pixels16_xy2_altivec",
75
  "put_no_rnd_pixels16_xy2_altivec",
76
  "hadamard8_diff8x8_altivec",
77
  "hadamard8_diff16_altivec",
78
  "avg_pixels8_xy2_altivec",
79
  "clear_blocks_dcbz32_ppc",
80
  "clear_blocks_dcbz128_ppc",
81
  "put_h264_chroma_mc8_altivec",
82
  "avg_h264_chroma_mc8_altivec",
83
  "put_h264_qpel16_h_lowpass_altivec",
84
  "avg_h264_qpel16_h_lowpass_altivec",
85
  "put_h264_qpel16_v_lowpass_altivec",
86
  "avg_h264_qpel16_v_lowpass_altivec",
87
  "put_h264_qpel16_hv_lowpass_altivec",
88
  "avg_h264_qpel16_hv_lowpass_altivec",
89
  ""
90
};
91
#include <stdio.h>
92
#endif
93

    
94
#ifdef CONFIG_POWERPC_PERF
95
void powerpc_display_perf_report(void)
96
{
97
  int i, j;
98
  av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
99
  for(i = 0 ; i < powerpc_perf_total ; i++)
100
  {
101
    for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
102
      {
103
        if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
104
          av_log(NULL, AV_LOG_INFO,
105
                  " Function \"%s\" (pmc%d):\n\tmin: %"PRIu64"\n\tmax: %"PRIu64"\n\tavg: %1.2lf (%"PRIu64")\n",
106
                  perfname[i],
107
                  j+1,
108
                  perfdata[j][i][powerpc_data_min],
109
                  perfdata[j][i][powerpc_data_max],
110
                  (double)perfdata[j][i][powerpc_data_sum] /
111
                  (double)perfdata[j][i][powerpc_data_num],
112
                  perfdata[j][i][powerpc_data_num]);
113
      }
114
  }
115
}
116
#endif /* CONFIG_POWERPC_PERF */
117

    
118
/* ***** WARNING ***** WARNING ***** WARNING ***** */
119
/*
120
  clear_blocks_dcbz32_ppc will not work properly
121
  on PowerPC processors with a cache line size
122
  not equal to 32 bytes.
123
  Fortunately all processor used by Apple up to
124
  at least the 7450 (aka second generation G4)
125
  use 32 bytes cache line.
126
  This is due to the use of the 'dcbz' instruction.
127
  It simply clear to zero a single cache line,
128
  so you need to know the cache line size to use it !
129
  It's absurd, but it's fast...
130

131
  update 24/06/2003 : Apple released yesterday the G5,
132
  with a PPC970. cache line size : 128 bytes. Oups.
133
  The semantic of dcbz was changed, it always clear
134
  32 bytes. so the function below will work, but will
135
  be slow. So I fixed check_dcbz_effect to use dcbzl,
136
  which is defined to clear a cache line (as dcbz before).
137
  So we still can distinguish, and use dcbz (32 bytes)
138
  or dcbzl (one cache line) as required.
139

140
  see <http://developer.apple.com/technotes/tn/tn2087.html>
141
  and <http://developer.apple.com/technotes/tn/tn2086.html>
142
*/
143
void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
144
{
145
POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
146
    register int misal = ((unsigned long)blocks & 0x00000010);
147
    register int i = 0;
148
POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
149
#if 1
150
    if (misal) {
151
      ((unsigned long*)blocks)[0] = 0L;
152
      ((unsigned long*)blocks)[1] = 0L;
153
      ((unsigned long*)blocks)[2] = 0L;
154
      ((unsigned long*)blocks)[3] = 0L;
155
      i += 16;
156
    }
157
    for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
158
      asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
159
    }
160
    if (misal) {
161
      ((unsigned long*)blocks)[188] = 0L;
162
      ((unsigned long*)blocks)[189] = 0L;
163
      ((unsigned long*)blocks)[190] = 0L;
164
      ((unsigned long*)blocks)[191] = 0L;
165
      i += 16;
166
    }
167
#else
168
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
169
#endif
170
POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
171
}
172

    
173
/* same as above, when dcbzl clear a whole 128B cache line
174
   i.e. the PPC970 aka G5 */
175
#ifdef HAVE_DCBZL
176
void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
177
{
178
POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
179
    register int misal = ((unsigned long)blocks & 0x0000007f);
180
    register int i = 0;
181
POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
182
#if 1
183
 if (misal) {
184
   // we could probably also optimize this case,
185
   // but there's not much point as the machines
186
   // aren't available yet (2003-06-26)
187
      memset(blocks, 0, sizeof(DCTELEM)*6*64);
188
    }
189
    else
190
      for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
191
        asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
192
      }
193
#else
194
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
195
#endif
196
POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
197
}
198
#else
199
void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
200
{
201
  memset(blocks, 0, sizeof(DCTELEM)*6*64);
202
}
203
#endif
204

    
205
#ifdef HAVE_DCBZL
206
/* check dcbz report how many bytes are set to 0 by dcbz */
207
/* update 24/06/2003 : replace dcbz by dcbzl to get
208
   the intended effect (Apple "fixed" dcbz)
209
   unfortunately this cannot be used unless the assembler
210
   knows about dcbzl ... */
211
long check_dcbzl_effect(void)
212
{
213
  register char *fakedata = (char*)av_malloc(1024);
214
  register char *fakedata_middle;
215
  register long zero = 0;
216
  register long i = 0;
217
  long count = 0;
218

    
219
  if (!fakedata)
220
  {
221
    return 0L;
222
  }
223

    
224
  fakedata_middle = (fakedata + 512);
225

    
226
  memset(fakedata, 0xFF, 1024);
227

    
228
  /* below the constraint "b" seems to mean "Address base register"
229
     in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
230
  asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
231

    
232
  for (i = 0; i < 1024 ; i ++)
233
  {
234
    if (fakedata[i] == (char)0)
235
      count++;
236
  }
237

    
238
  av_free(fakedata);
239

    
240
  return count;
241
}
242
#else
243
long check_dcbzl_effect(void)
244
{
245
  return 0;
246
}
247
#endif
248

    
249
static void prefetch_ppc(void *mem, int stride, int h)
250
{
251
    register const uint8_t *p = mem;
252
    do {
253
        asm volatile ("dcbt 0,%0" : : "r" (p));
254
        p+= stride;
255
    } while(--h);
256
}
257

    
258
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
259
{
260
    // Common optimizations whether AltiVec is available or not
261
    c->prefetch = prefetch_ppc;
262
    switch (check_dcbzl_effect()) {
263
        case 32:
264
            c->clear_blocks = clear_blocks_dcbz32_ppc;
265
            break;
266
        case 128:
267
            c->clear_blocks = clear_blocks_dcbz128_ppc;
268
            break;
269
        default:
270
            break;
271
    }
272

    
273
#ifdef HAVE_ALTIVEC
274
    if(ENABLE_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
275

    
276
    if (has_altivec()) {
277
        mm_flags |= MM_ALTIVEC;
278

    
279
        dsputil_init_altivec(c, avctx);
280
        if(ENABLE_SNOW_DECODER) snow_init_altivec(c, avctx);
281
        if(ENABLE_VC1_DECODER || ENABLE_WMV3_DECODER)
282
            vc1dsp_init_altivec(c, avctx);
283
        float_init_altivec(c, avctx);
284
        int_init_altivec(c, avctx);
285
        c->gmc1 = gmc1_altivec;
286

    
287
#ifdef CONFIG_ENCODERS
288
        if (avctx->dct_algo == FF_DCT_AUTO ||
289
            avctx->dct_algo == FF_DCT_ALTIVEC)
290
        {
291
            c->fdct = fdct_altivec;
292
        }
293
#endif //CONFIG_ENCODERS
294

    
295
        if (avctx->lowres==0)
296
        {
297
        if ((avctx->idct_algo == FF_IDCT_AUTO) ||
298
                (avctx->idct_algo == FF_IDCT_ALTIVEC))
299
        {
300
            c->idct_put = idct_put_altivec;
301
            c->idct_add = idct_add_altivec;
302
            c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
303
        }
304
        }
305

    
306
#ifdef CONFIG_POWERPC_PERF
307
        {
308
          int i, j;
309
          for (i = 0 ; i < powerpc_perf_total ; i++)
310
          {
311
            for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
312
              {
313
                perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
314
                perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
315
                perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
316
                perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
317
              }
318
          }
319
        }
320
#endif /* CONFIG_POWERPC_PERF */
321
    }
322
#endif /* HAVE_ALTIVEC */
323
}