Statistics
| Branch: | Revision:

ffmpeg / libavcodec / ppc / dsputil_ppc.c @ 12802ec0

History | View | Annotate | Download (6.23 KB)

1 05c4072b Michael Niedermayer
/*
2
 * Copyright (c) 2002 Brian Foley
3
 * Copyright (c) 2002 Dieter Shirley
4 c4a17148 Michael Niedermayer
 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
5 05c4072b Michael Niedermayer
 *
6 b78e7197 Diego Biurrun
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9 05c4072b Michael Niedermayer
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11 b78e7197 Diego Biurrun
 * version 2.1 of the License, or (at your option) any later version.
12 05c4072b Michael Niedermayer
 *
13 b78e7197 Diego Biurrun
 * FFmpeg is distributed in the hope that it will be useful,
14 05c4072b Michael Niedermayer
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19 b78e7197 Diego Biurrun
 * License along with FFmpeg; if not, write to the Free Software
20 5509bffa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 05c4072b Michael Niedermayer
 */
22
23 c6c98d08 Stefano Sabatini
#include "libavutil/cpu.h"
24 245976da Diego Biurrun
#include "libavcodec/dsputil.h"
25 ab6c65f6 Brian Foley
#include "dsputil_altivec.h"
26
27 35e5fb06 Romain Dolbeau
/* ***** WARNING ***** WARNING ***** WARNING ***** */
28
/*
29 e3905ce0 Diego Biurrun
clear_blocks_dcbz32_ppc will not work properly on PowerPC processors with a
30
cache line size not equal to 32 bytes.
31
Fortunately all processor used by Apple up to at least the 7450 (aka second
32
generation G4) use 32 bytes cache line.
33
This is due to the use of the 'dcbz' instruction. It simply clear to zero a
34
single cache line, so you need to know the cache line size to use it !
35
It's absurd, but it's fast...
36 a4adb608 Michael Niedermayer

37 e3905ce0 Diego Biurrun
update 24/06/2003 : Apple released yesterday the G5, with a PPC970. cache line
38
size: 128 bytes. Oups.
39
The semantic of dcbz was changed, it always clear 32 bytes. so the function
40
below will work, but will be slow. So I fixed check_dcbz_effect to use dcbzl,
41
which is defined to clear a cache line (as dcbz before). So we still can
42
distinguish, and use dcbz (32 bytes) or dcbzl (one cache line) as required.
43 a4adb608 Michael Niedermayer

44 e3905ce0 Diego Biurrun
see <http://developer.apple.com/technotes/tn/tn2087.html>
45
and <http://developer.apple.com/technotes/tn/tn2086.html>
46 35e5fb06 Romain Dolbeau
*/
47 ddb8c2c0 Måns Rullgård
static void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
48 35e5fb06 Romain Dolbeau
{
49
    register int misal = ((unsigned long)blocks & 0x00000010);
50
    register int i = 0;
51
#if 1
52
    if (misal) {
53 e3905ce0 Diego Biurrun
        ((unsigned long*)blocks)[0] = 0L;
54
        ((unsigned long*)blocks)[1] = 0L;
55
        ((unsigned long*)blocks)[2] = 0L;
56
        ((unsigned long*)blocks)[3] = 0L;
57
        i += 16;
58 35e5fb06 Romain Dolbeau
    }
59 b1d041c1 Roine Gustafsson
    for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
60 be449fca Diego Pettenò
        __asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
61 35e5fb06 Romain Dolbeau
    }
62
    if (misal) {
63 e3905ce0 Diego Biurrun
        ((unsigned long*)blocks)[188] = 0L;
64
        ((unsigned long*)blocks)[189] = 0L;
65
        ((unsigned long*)blocks)[190] = 0L;
66
        ((unsigned long*)blocks)[191] = 0L;
67
        i += 16;
68 35e5fb06 Romain Dolbeau
    }
69
#else
70
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
71
#endif
72
}
73
74 a4adb608 Michael Niedermayer
/* same as above, when dcbzl clear a whole 128B cache line
75
   i.e. the PPC970 aka G5 */
76 b250f9c6 Aurelien Jacobs
#if HAVE_DCBZL
77 ddb8c2c0 Måns Rullgård
static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
78 a4adb608 Michael Niedermayer
{
79
    register int misal = ((unsigned long)blocks & 0x0000007f);
80
    register int i = 0;
81
#if 1
82 e3905ce0 Diego Biurrun
    if (misal) {
83
        // we could probably also optimize this case,
84
        // but there's not much point as the machines
85
        // aren't available yet (2003-06-26)
86
        memset(blocks, 0, sizeof(DCTELEM)*6*64);
87 a4adb608 Michael Niedermayer
    }
88
    else
89 e3905ce0 Diego Biurrun
        for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
90 be449fca Diego Pettenò
            __asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
91 e3905ce0 Diego Biurrun
        }
92 a4adb608 Michael Niedermayer
#else
93
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
94
#endif
95
}
96
#else
97 ddb8c2c0 Måns Rullgård
static void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
98 a4adb608 Michael Niedermayer
{
99 e3905ce0 Diego Biurrun
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
100 a4adb608 Michael Niedermayer
}
101
#endif
102
103 b250f9c6 Aurelien Jacobs
#if HAVE_DCBZL
104 35e5fb06 Romain Dolbeau
/* check dcbz report how many bytes are set to 0 by dcbz */
105 a4adb608 Michael Niedermayer
/* update 24/06/2003 : replace dcbz by dcbzl to get
106
   the intended effect (Apple "fixed" dcbz)
107
   unfortunately this cannot be used unless the assembler
108
   knows about dcbzl ... */
109 ddb8c2c0 Måns Rullgård
static long check_dcbzl_effect(void)
110 35e5fb06 Romain Dolbeau
{
111 e3905ce0 Diego Biurrun
    register char *fakedata = av_malloc(1024);
112
    register char *fakedata_middle;
113
    register long zero = 0;
114
    register long i = 0;
115
    long count = 0;
116 35e5fb06 Romain Dolbeau
117 e3905ce0 Diego Biurrun
    if (!fakedata) {
118
        return 0L;
119
    }
120 35e5fb06 Romain Dolbeau
121 e3905ce0 Diego Biurrun
    fakedata_middle = (fakedata + 512);
122 35e5fb06 Romain Dolbeau
123 e3905ce0 Diego Biurrun
    memset(fakedata, 0xFF, 1024);
124 35e5fb06 Romain Dolbeau
125 e3905ce0 Diego Biurrun
    /* below the constraint "b" seems to mean "Address base register"
126
       in gcc-3.3 / RS/6000 speaks. seems to avoid using r0, so.... */
127 be449fca Diego Pettenò
    __asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
128 35e5fb06 Romain Dolbeau
129 e3905ce0 Diego Biurrun
    for (i = 0; i < 1024 ; i ++) {
130
        if (fakedata[i] == (char)0)
131
            count++;
132
    }
133 35e5fb06 Romain Dolbeau
134 e3905ce0 Diego Biurrun
    av_free(fakedata);
135 115329f1 Diego Biurrun
136 e3905ce0 Diego Biurrun
    return count;
137 35e5fb06 Romain Dolbeau
}
138 a4adb608 Michael Niedermayer
#else
139 ddb8c2c0 Måns Rullgård
static long check_dcbzl_effect(void)
140 a4adb608 Michael Niedermayer
{
141
  return 0;
142
}
143
#endif
144 35e5fb06 Romain Dolbeau
145 a5db5bda Luca Barbato
static void prefetch_ppc(void *mem, int stride, int h)
146
{
147
    register const uint8_t *p = mem;
148
    do {
149 be449fca Diego Pettenò
        __asm__ volatile ("dcbt 0,%0" : : "r" (p));
150 a5db5bda Luca Barbato
        p+= stride;
151
    } while(--h);
152
}
153
154 b0368839 Michael Niedermayer
void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
155 ab6c65f6 Brian Foley
{
156 a1d0b6a2 Diego Biurrun
    // Common optimizations whether AltiVec is available or not
157 a5db5bda Luca Barbato
    c->prefetch = prefetch_ppc;
158 73e4ff9d Luca Barbato
    switch (check_dcbzl_effect()) {
159
        case 32:
160
            c->clear_blocks = clear_blocks_dcbz32_ppc;
161
            break;
162
        case 128:
163
            c->clear_blocks = clear_blocks_dcbz128_ppc;
164
            break;
165
        default:
166
            break;
167
    }
168 a6a12a8a Romain Dolbeau
169 b250f9c6 Aurelien Jacobs
#if HAVE_ALTIVEC
170 49fb20cb Aurelien Jacobs
    if(CONFIG_H264_DECODER) dsputil_h264_init_ppc(c, avctx);
171 115329f1 Diego Biurrun
172 c6c98d08 Stefano Sabatini
    if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
173 73e4ff9d Luca Barbato
        dsputil_init_altivec(c, avctx);
174 241807f3 Luca Barbato
        float_init_altivec(c, avctx);
175 1f1aadc3 Luca Barbato
        int_init_altivec(c, avctx);
176 bb270c08 Diego Biurrun
        c->gmc1 = gmc1_altivec;
177 b0368839 Michael Niedermayer
178 b250f9c6 Aurelien Jacobs
#if CONFIG_ENCODERS
179 bb270c08 Diego Biurrun
        if (avctx->dct_algo == FF_DCT_AUTO ||
180 e3905ce0 Diego Biurrun
            avctx->dct_algo == FF_DCT_ALTIVEC) {
181 bb270c08 Diego Biurrun
            c->fdct = fdct_altivec;
182
        }
183 14cabd40 James Klicman
#endif //CONFIG_ENCODERS
184
185 e3905ce0 Diego Biurrun
        if (avctx->lowres==0) {
186
            if ((avctx->idct_algo == FF_IDCT_AUTO) ||
187
                (avctx->idct_algo == FF_IDCT_ALTIVEC)) {
188
                c->idct_put = idct_put_altivec;
189
                c->idct_add = idct_add_altivec;
190
                c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
191 99e5a9d1 Diego Biurrun
            }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER) &&
192 454403ba David Conrad
                     avctx->idct_algo==FF_IDCT_VP3){
193
                c->idct_put = ff_vp3_idct_put_altivec;
194
                c->idct_add = ff_vp3_idct_add_altivec;
195
                c->idct     = ff_vp3_idct_altivec;
196
                c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
197 e3905ce0 Diego Biurrun
            }
198 b0368839 Michael Niedermayer
        }
199 115329f1 Diego Biurrun
200 ab6c65f6 Brian Foley
    }
201 75336fc8 Luca Barbato
#endif /* HAVE_ALTIVEC */
202 ab6c65f6 Brian Foley
}