Revision be449fca

View differences:

configure
448 448
    asm="$2"
449 449
    shift 2
450 450
    check_cc "$@" <<EOF && enable $name || disable $name
451
int foo(void){ asm volatile($asm); }
451
int foo(void){ __asm__ volatile($asm); }
452 452
EOF
453 453
}
454 454

  
......
1574 1574
    # base pointer is cleared in the inline assembly code.
1575 1575
    check_exec_crash <<EOF && enable ebp_available
1576 1576
    volatile int i=0;
1577
    asm volatile (
1577
    __asm__ volatile (
1578 1578
        "xorl %%ebp, %%ebp"
1579 1579
    ::: "%ebp");
1580 1580
    return i;
......
1934 1934
# Find out if the .align argument is a power of two or not.
1935 1935
if test $asmalign_pot = "unknown"; then
1936 1936
    disable asmalign_pot
1937
    echo 'asm (".align 3");' | check_cc && enable asmalign_pot
1937
    echo '__asm__ (".align 3");' | check_cc && enable asmalign_pot
1938 1938
fi
1939 1939

  
1940 1940
enabled_any $DECODER_LIST      && enable decoders
doc/optimization.txt
154 154
General Tips:
155 155
-------------
156 156
Use asm loops like:
157
asm(
157
__asm__(
158 158
    "1: ....
159 159
    ...
160 160
    "jump_instruciton ....
161 161
Do not use C loops:
162 162
do{
163
    asm(
163
    __asm__(
164 164
        ...
165 165
}while()
166 166

  
167
Use asm() instead of intrinsics. The latter requires a good optimizing compiler
167
Use __asm__() instead of intrinsics. The latter requires a good optimizing compiler
168 168
which gcc is not.
169 169

  
170 170

  
libavcodec/alpha/asm.h
105 105
#define implver         __builtin_alpha_implver
106 106
#define rpcc            __builtin_alpha_rpcc
107 107
#else
108
#define prefetch(p)     asm volatile("ldl $31,%0"  : : "m"(*(const char *) (p)) : "memory")
109
#define prefetch_en(p)  asm volatile("ldq $31,%0"  : : "m"(*(const char *) (p)) : "memory")
110
#define prefetch_m(p)   asm volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory")
111
#define prefetch_men(p) asm volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory")
112
#define cmpbge(a, b) ({ uint64_t __r; asm ("cmpbge  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
113
#define extql(a, b)  ({ uint64_t __r; asm ("extql   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
114
#define extwl(a, b)  ({ uint64_t __r; asm ("extwl   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
115
#define extqh(a, b)  ({ uint64_t __r; asm ("extqh   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
116
#define zap(a, b)    ({ uint64_t __r; asm ("zap     %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
117
#define zapnot(a, b) ({ uint64_t __r; asm ("zapnot  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
118
#define amask(a)     ({ uint64_t __r; asm ("amask   %1,%0"      : "=r" (__r) : "rI"  (a));           __r; })
119
#define implver()    ({ uint64_t __r; asm ("implver %0"         : "=r" (__r));                       __r; })
120
#define rpcc()       ({ uint64_t __r; asm volatile ("rpcc %0"   : "=r" (__r));                       __r; })
108
#define prefetch(p)     __asm__ volatile("ldl $31,%0"  : : "m"(*(const char *) (p)) : "memory")
109
#define prefetch_en(p)  __asm__ volatile("ldq $31,%0"  : : "m"(*(const char *) (p)) : "memory")
110
#define prefetch_m(p)   __asm__ volatile("lds $f31,%0" : : "m"(*(const char *) (p)) : "memory")
111
#define prefetch_men(p) __asm__ volatile("ldt $f31,%0" : : "m"(*(const char *) (p)) : "memory")
112
#define cmpbge(a, b) ({ uint64_t __r; __asm__ ("cmpbge  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
113
#define extql(a, b)  ({ uint64_t __r; __asm__ ("extql   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
114
#define extwl(a, b)  ({ uint64_t __r; __asm__ ("extwl   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
115
#define extqh(a, b)  ({ uint64_t __r; __asm__ ("extqh   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
116
#define zap(a, b)    ({ uint64_t __r; __asm__ ("zap     %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
117
#define zapnot(a, b) ({ uint64_t __r; __asm__ ("zapnot  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
118
#define amask(a)     ({ uint64_t __r; __asm__ ("amask   %1,%0"      : "=r" (__r) : "rI"  (a));           __r; })
119
#define implver()    ({ uint64_t __r; __asm__ ("implver %0"         : "=r" (__r));                       __r; })
120
#define rpcc()       ({ uint64_t __r; __asm__ volatile ("rpcc %0"   : "=r" (__r));                       __r; })
121 121
#endif
122
#define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory")
122
#define wh64(p) __asm__ volatile("wh64 (%0)" : : "r"(p) : "memory")
123 123

  
124 124
#if GNUC_PREREQ(3,3) && defined(__alpha_max__)
125 125
#define minub8  __builtin_alpha_minub8
......
136 136
#define unpkbl  __builtin_alpha_unpkbl
137 137
#define unpkbw  __builtin_alpha_unpkbw
138 138
#else
139
#define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
140
#define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
141
#define minuw4(a, b) ({ uint64_t __r; asm (".arch ev6; minuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
142
#define minsw4(a, b) ({ uint64_t __r; asm (".arch ev6; minsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
143
#define maxub8(a, b) ({ uint64_t __r; asm (".arch ev6; maxub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
144
#define maxsb8(a, b) ({ uint64_t __r; asm (".arch ev6; maxsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
145
#define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
146
#define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
147
#define perr(a, b)   ({ uint64_t __r; asm (".arch ev6; perr    %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
148
#define pklb(a)      ({ uint64_t __r; asm (".arch ev6; pklb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
149
#define pkwb(a)      ({ uint64_t __r; asm (".arch ev6; pkwb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
150
#define unpkbl(a)    ({ uint64_t __r; asm (".arch ev6; unpkbl  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
151
#define unpkbw(a)    ({ uint64_t __r; asm (".arch ev6; unpkbw  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
139
#define minub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
140
#define minsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
141
#define minuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
142
#define minsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; minsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
143
#define maxub8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
144
#define maxsb8(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
145
#define maxuw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
146
#define maxsw4(a, b) ({ uint64_t __r; __asm__ (".arch ev6; maxsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
147
#define perr(a, b)   ({ uint64_t __r; __asm__ (".arch ev6; perr    %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
148
#define pklb(a)      ({ uint64_t __r; __asm__ (".arch ev6; pklb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
149
#define pkwb(a)      ({ uint64_t __r; __asm__ (".arch ev6; pkwb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
150
#define unpkbl(a)    ({ uint64_t __r; __asm__ (".arch ev6; unpkbl  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
151
#define unpkbw(a)    ({ uint64_t __r; __asm__ (".arch ev6; unpkbw  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
152 152
#endif
153 153

  
154 154
#elif defined(__DECC)           /* Digital/Compaq/hp "ccc" compiler */
......
158 158
#define ldl(p) (*(const int32_t *)  (p))
159 159
#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
160 160
#define stl(l, p) do { *(int32_t *)  (p) = (l); } while (0)
161
#define ldq_u(a)     asm ("ldq_u   %v0,0(%a0)", a)
161
#define ldq_u(a)     __asm__ ("ldq_u   %v0,0(%a0)", a)
162 162
#define uldq(a)      (*(const __unaligned uint64_t *) (a))
163
#define cmpbge(a, b) asm ("cmpbge  %a0,%a1,%v0", a, b)
164
#define extql(a, b)  asm ("extql   %a0,%a1,%v0", a, b)
165
#define extwl(a, b)  asm ("extwl   %a0,%a1,%v0", a, b)
166
#define extqh(a, b)  asm ("extqh   %a0,%a1,%v0", a, b)
167
#define zap(a, b)    asm ("zap     %a0,%a1,%v0", a, b)
168
#define zapnot(a, b) asm ("zapnot  %a0,%a1,%v0", a, b)
169
#define amask(a)     asm ("amask   %a0,%v0", a)
170
#define implver()    asm ("implver %v0")
171
#define rpcc()       asm ("rpcc           %v0")
172
#define minub8(a, b) asm ("minub8  %a0,%a1,%v0", a, b)
173
#define minsb8(a, b) asm ("minsb8  %a0,%a1,%v0", a, b)
174
#define minuw4(a, b) asm ("minuw4  %a0,%a1,%v0", a, b)
175
#define minsw4(a, b) asm ("minsw4  %a0,%a1,%v0", a, b)
176
#define maxub8(a, b) asm ("maxub8  %a0,%a1,%v0", a, b)
177
#define maxsb8(a, b) asm ("maxsb8  %a0,%a1,%v0", a, b)
178
#define maxuw4(a, b) asm ("maxuw4  %a0,%a1,%v0", a, b)
179
#define maxsw4(a, b) asm ("maxsw4  %a0,%a1,%v0", a, b)
180
#define perr(a, b)   asm ("perr    %a0,%a1,%v0", a, b)
181
#define pklb(a)      asm ("pklb    %a0,%v0", a)
182
#define pkwb(a)      asm ("pkwb    %a0,%v0", a)
183
#define unpkbl(a)    asm ("unpkbl  %a0,%v0", a)
184
#define unpkbw(a)    asm ("unpkbw  %a0,%v0", a)
185
#define wh64(a)      asm ("wh64    %a0", a)
163
#define cmpbge(a, b) __asm__ ("cmpbge  %a0,%a1,%v0", a, b)
164
#define extql(a, b)  __asm__ ("extql   %a0,%a1,%v0", a, b)
165
#define extwl(a, b)  __asm__ ("extwl   %a0,%a1,%v0", a, b)
166
#define extqh(a, b)  __asm__ ("extqh   %a0,%a1,%v0", a, b)
167
#define zap(a, b)    __asm__ ("zap     %a0,%a1,%v0", a, b)
168
#define zapnot(a, b) __asm__ ("zapnot  %a0,%a1,%v0", a, b)
169
#define amask(a)     __asm__ ("amask   %a0,%v0", a)
170
#define implver()    __asm__ ("implver %v0")
171
#define rpcc()       __asm__ ("rpcc           %v0")
172
#define minub8(a, b) __asm__ ("minub8  %a0,%a1,%v0", a, b)
173
#define minsb8(a, b) __asm__ ("minsb8  %a0,%a1,%v0", a, b)
174
#define minuw4(a, b) __asm__ ("minuw4  %a0,%a1,%v0", a, b)
175
#define minsw4(a, b) __asm__ ("minsw4  %a0,%a1,%v0", a, b)
176
#define maxub8(a, b) __asm__ ("maxub8  %a0,%a1,%v0", a, b)
177
#define maxsb8(a, b) __asm__ ("maxsb8  %a0,%a1,%v0", a, b)
178
#define maxuw4(a, b) __asm__ ("maxuw4  %a0,%a1,%v0", a, b)
179
#define maxsw4(a, b) __asm__ ("maxsw4  %a0,%a1,%v0", a, b)
180
#define perr(a, b)   __asm__ ("perr    %a0,%a1,%v0", a, b)
181
#define pklb(a)      __asm__ ("pklb    %a0,%v0", a)
182
#define pkwb(a)      __asm__ ("pkwb    %a0,%v0", a)
183
#define unpkbl(a)    __asm__ ("unpkbl  %a0,%v0", a)
184
#define unpkbw(a)    __asm__ ("unpkbw  %a0,%v0", a)
185
#define wh64(a)      __asm__ ("wh64    %a0", a)
186 186

  
187 187
#else
188 188
#error "Unknown compiler!"
libavcodec/armv4l/dsputil_arm.c
66 66

  
67 67
static void add_pixels_clamped_ARM(short *block, unsigned char *dest, int line_size)
68 68
{
69
    asm volatile (
69
    __asm__ volatile (
70 70
                  "mov r10, #8 \n\t"
71 71

  
72 72
                  "1: \n\t"
......
206 206
#ifdef HAVE_ARMV5TE
207 207
static void prefetch_arm(void *mem, int stride, int h)
208 208
{
209
    asm volatile(
209
    __asm__ volatile(
210 210
        "1:              \n\t"
211 211
        "subs %0, %0, #1 \n\t"
212 212
        "pld  [%1]       \n\t"
libavcodec/armv4l/dsputil_iwmmxt.c
22 22
#include "libavcodec/dsputil.h"
23 23

  
24 24
#define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
25
#define SET_RND(regd)  asm volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
25
#define SET_RND(regd)  __asm__ volatile ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
26 26
#define WAVG2B "wavg2b"
27 27
#include "dsputil_iwmmxt_rnd.h"
28 28
#undef DEF
......
30 30
#undef WAVG2B
31 31

  
32 32
#define DEF(x, y) x ## _ ## y ##_iwmmxt
33
#define SET_RND(regd)  asm volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
33
#define SET_RND(regd)  __asm__ volatile ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
34 34
#define WAVG2B "wavg2br"
35 35
#include "dsputil_iwmmxt_rnd.h"
36 36
#undef DEF
......
39 39

  
40 40
// need scheduling
41 41
#define OP(AVG)                                         \
42
    asm volatile (                                      \
42
    __asm__ volatile (                                      \
43 43
        /* alignment */                                 \
44 44
        "and r12, %[pixels], #7 \n\t"                   \
45 45
        "bic %[pixels], %[pixels], #7 \n\t"             \
......
89 89
{
90 90
    uint8_t *pixels2 = pixels + line_size;
91 91

  
92
    asm volatile (
92
    __asm__ volatile (
93 93
        "mov            r12, #4                 \n\t"
94 94
        "1:                                     \n\t"
95 95
        "pld            [%[pixels], %[line_size2]]              \n\t"
......
125 125

  
126 126
static void clear_blocks_iwmmxt(DCTELEM *blocks)
127 127
{
128
    asm volatile(
128
    __asm__ volatile(
129 129
                "wzero wr0                      \n\t"
130 130
                "mov r1, #(128 * 6 / 32)        \n\t"
131 131
                "1:                             \n\t"
libavcodec/armv4l/dsputil_iwmmxt_rnd.h
26 26
void DEF(put, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
27 27
{
28 28
    int stride = line_size;
29
    asm volatile (
29
    __asm__ volatile (
30 30
        "and r12, %[pixels], #7 \n\t"
31 31
        "bic %[pixels], %[pixels], #7 \n\t"
32 32
        "tmcr wcgr1, r12 \n\t"
......
60 60
void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
61 61
{
62 62
    int stride = line_size;
63
    asm volatile (
63
    __asm__ volatile (
64 64
        "and r12, %[pixels], #7 \n\t"
65 65
        "bic %[pixels], %[pixels], #7 \n\t"
66 66
        "tmcr wcgr1, r12 \n\t"
......
102 102
void DEF(put, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
103 103
{
104 104
    int stride = line_size;
105
    asm volatile (
105
    __asm__ volatile (
106 106
        "and r12, %[pixels], #7 \n\t"
107 107
        "bic %[pixels], %[pixels], #7 \n\t"
108 108
        "tmcr wcgr1, r12 \n\t"
......
142 142
void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
143 143
{
144 144
    int stride = line_size;
145
    asm volatile (
145
    __asm__ volatile (
146 146
        "pld [%[pixels]]                \n\t"
147 147
        "pld [%[pixels], #32]           \n\t"
148 148
        "pld [%[block]]                 \n\t"
......
201 201
    // [wr0 wr1 wr2 wr3] for previous line
202 202
    // [wr4 wr5 wr6 wr7] for current line
203 203
    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
204
    asm volatile(
204
    __asm__ volatile(
205 205
        "pld [%[pixels]]                \n\t"
206 206
        "pld [%[pixels], #32]           \n\t"
207 207
        "and r12, %[pixels], #7         \n\t"
......
250 250
    // [wr0 wr1 wr2 wr3] for previous line
251 251
    // [wr4 wr5 wr6 wr7] for current line
252 252
    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
253
    asm volatile(
253
    __asm__ volatile(
254 254
        "pld [%[pixels]]                \n\t"
255 255
        "pld [%[pixels], #32]           \n\t"
256 256
        "and r12, %[pixels], #7         \n\t"
......
311 311
    // [wr0 wr1 wr2 wr3] for previous line
312 312
    // [wr4 wr5 wr6 wr7] for current line
313 313
    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
314
    asm volatile(
314
    __asm__ volatile(
315 315
        "pld [%[pixels]]                \n\t"
316 316
        "pld [%[pixels], #32]           \n\t"
317 317
        "pld [%[block]]                 \n\t"
......
372 372
    // [wr0 wr1 wr2 wr3] for previous line
373 373
    // [wr4 wr5 wr6 wr7] for current line
374 374
    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
375
    asm volatile(
375
    __asm__ volatile(
376 376
        "pld [%[pixels]]                \n\t"
377 377
        "pld [%[pixels], #32]           \n\t"
378 378
        "pld [%[block]]                 \n\t"
......
448 448
    int stride = line_size;
449 449
    // [wr0 wr1 wr2 wr3] for previous line
450 450
    // [wr4 wr5 wr6 wr7] for current line
451
    asm volatile(
451
    __asm__ volatile(
452 452
        "pld            [%[pixels]]                             \n\t"
453 453
        "pld            [%[pixels], #32]                        \n\t"
454 454
        "and            r12, %[pixels], #7                      \n\t"
......
502 502
    int stride = line_size;
503 503
    // [wr0 wr1 wr2 wr3] for previous line
504 504
    // [wr4 wr5 wr6 wr7] for current line
505
    asm volatile(
505
    __asm__ volatile(
506 506
        "pld [%[pixels]]                \n\t"
507 507
        "pld [%[pixels], #32]           \n\t"
508 508
        "and r12, %[pixels], #7         \n\t"
......
559 559
    int stride = line_size;
560 560
    // [wr0 wr1 wr2 wr3] for previous line
561 561
    // [wr4 wr5 wr6 wr7] for current line
562
    asm volatile(
562
    __asm__ volatile(
563 563
        "pld [%[pixels]]                \n\t"
564 564
        "pld [%[pixels], #32]           \n\t"
565 565
        "and r12, %[pixels], #7         \n\t"
......
627 627
    // [wr0 wr1 wr2 wr3] for previous line
628 628
    // [wr4 wr5 wr6 wr7] for current line
629 629
    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
630
    asm volatile(
630
    __asm__ volatile(
631 631
        "pld [%[pixels]]                \n\t"
632 632
        "mov r12, #2                    \n\t"
633 633
        "pld [%[pixels], #32]           \n\t"
......
721 721
    // [wr0 wr1 wr2 wr3] for previous line
722 722
    // [wr4 wr5 wr6 wr7] for current line
723 723
    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
724
    asm volatile(
724
    __asm__ volatile(
725 725
        "pld [%[pixels]]                \n\t"
726 726
        "mov r12, #2                    \n\t"
727 727
        "pld [%[pixels], #32]           \n\t"
......
863 863
    // [wr0 wr1 wr2 wr3] for previous line
864 864
    // [wr4 wr5 wr6 wr7] for current line
865 865
    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
866
    asm volatile(
866
    __asm__ volatile(
867 867
        "pld [%[block]]                 \n\t"
868 868
        "pld [%[block], #32]            \n\t"
869 869
        "pld [%[pixels]]                \n\t"
......
967 967
    // [wr0 wr1 wr2 wr3] for previous line
968 968
    // [wr4 wr5 wr6 wr7] for current line
969 969
    SET_RND(wr15); // =2 for rnd  and  =1 for no_rnd version
970
    asm volatile(
970
    __asm__ volatile(
971 971
        "pld [%[block]]                 \n\t"
972 972
        "pld [%[block], #32]            \n\t"
973 973
        "pld [%[pixels]]                \n\t"
libavcodec/armv4l/float_arm_vfp.c
42 42
static void vector_fmul_vfp(float *dst, const float *src, int len)
43 43
{
44 44
    int tmp;
45
    asm volatile(
45
    __asm__ volatile(
46 46
        "fmrx       %[tmp], fpscr\n\t"
47 47
        "orr        %[tmp], %[tmp], #(3 << 16)\n\t" /* set vector size to 4 */
48 48
        "fmxr       fpscr, %[tmp]\n\t"
......
90 90
static void vector_fmul_reverse_vfp(float *dst, const float *src0, const float *src1, int len)
91 91
{
92 92
    src1 += len;
93
    asm volatile(
93
    __asm__ volatile(
94 94
        "fldmdbs    %[src1]!, {s0-s3}\n\t"
95 95
        "fldmias    %[src0]!, {s8-s11}\n\t"
96 96
        "fldmdbs    %[src1]!, {s4-s7}\n\t"
......
149 149
 */
150 150
void float_to_int16_vfp(int16_t *dst, const float *src, int len)
151 151
{
152
    asm volatile(
152
    __asm__ volatile(
153 153
        "fldmias    %[src]!, {s16-s23}\n\t"
154 154
        "ftosis     s0, s16\n\t"
155 155
        "ftosis     s1, s17\n\t"
libavcodec/armv4l/mathops.h
25 25
#ifdef FRAC_BITS
26 26
#   define MULL(a, b) \
27 27
        ({  int lo, hi;\
28
         asm("smull %0, %1, %2, %3     \n\t"\
28
         __asm__("smull %0, %1, %2, %3     \n\t"\
29 29
             "mov   %0, %0,     lsr %4\n\t"\
30 30
             "add   %1, %0, %1, lsl %5\n\t"\
31 31
             : "=&r"(lo), "=&r"(hi)\
......
37 37
static inline av_const int MULH(int a, int b)
38 38
{
39 39
    int r;
40
    asm ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
40
    __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b));
41 41
    return r;
42 42
}
43 43
#define MULH MULH
44 44
#else
45 45
#define MULH(a, b) \
46 46
    ({ int lo, hi;\
47
     asm ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));\
47
     __asm__ ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a));\
48 48
     hi; })
49 49
#endif
50 50

  
51 51
static inline av_const int64_t MUL64(int a, int b)
52 52
{
53 53
    union { uint64_t x; unsigned hl[2]; } x;
54
    asm ("smull %0, %1, %2, %3"
54
    __asm__ ("smull %0, %1, %2, %3"
55 55
         : "=r"(x.hl[0]), "=r"(x.hl[1]) : "r"(a), "r"(b));
56 56
    return x.x;
57 57
}
......
60 60
static inline av_const int64_t MAC64(int64_t d, int a, int b)
61 61
{
62 62
    union { uint64_t x; unsigned hl[2]; } x = { d };
63
    asm ("smlal %0, %1, %2, %3"
63
    __asm__ ("smlal %0, %1, %2, %3"
64 64
         : "+r"(x.hl[0]), "+r"(x.hl[1]) : "r"(a), "r"(b));
65 65
    return x.x;
66 66
}
......
71 71

  
72 72
/* signed 16x16 -> 32 multiply add accumulate */
73 73
#   define MAC16(rt, ra, rb) \
74
        asm ("smlabb %0, %2, %3, %0" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
74
        __asm__ ("smlabb %0, %2, %3, %0" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
75 75
/* signed 16x16 -> 32 multiply */
76 76
#   define MUL16(ra, rb)                                                \
77 77
        ({ int __rt;                                                    \
78
         asm ("smulbb %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb));  \
78
         __asm__ ("smulbb %0, %1, %2" : "=r" (__rt) : "r" (ra), "r" (rb));  \
79 79
         __rt; })
80 80

  
81 81
#endif
libavcodec/armv4l/mpegvideo_armv5te.c
65 65
({ DCTELEM *xblock = xxblock; \
66 66
   int xqmul = xxqmul, xqadd = xxqadd, xcount = xxcount, xtmp; \
67 67
   int xdata1, xdata2; \
68
asm volatile( \
68
__asm__ volatile( \
69 69
        "subs %[count], %[count], #2       \n\t" \
70 70
        "ble 2f                            \n\t" \
71 71
        "ldrd r4, [%[block], #0]           \n\t" \
libavcodec/armv4l/mpegvideo_iwmmxt.c
48 48
    else
49 49
        nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
50 50

  
51
    asm volatile (
51
    __asm__ volatile (
52 52
/*      "movd %1, %%mm6                 \n\t" //qmul */
53 53
/*      "packssdw %%mm6, %%mm6          \n\t" */
54 54
/*      "packssdw %%mm6, %%mm6          \n\t" */
libavcodec/bfin/dsputil_bfin.c
77 77
{
78 78
    // This is just a simple memset.
79 79
    //
80
    asm("P0=192; "
80
    __asm__("P0=192; "
81 81
        "I0=%0;  "
82 82
        "R0=0;   "
83 83
        "LSETUP(clear_blocks_blkfn_lab,clear_blocks_blkfn_lab)LC0=P0;"
libavcodec/bfin/mathops.h
24 24

  
25 25
#ifdef CONFIG_MPEGAUDIO_HP
26 26
#define MULH(X,Y) ({ int xxo;                           \
27
    asm (                                               \
27
    __asm__ (                                               \
28 28
        "a1 = %2.L * %1.L (FU);\n\t"                    \
29 29
        "a1 = a1 >> 16;\n\t"                            \
30 30
        "a1 += %2.H * %1.L (IS,M);\n\t"                 \
......
34 34
        : "=d" (xxo) : "d" (X), "d" (Y) : "A0","A1"); xxo; })
35 35
#else
36 36
#define MULH(X,Y) ({ int xxo;                           \
37
    asm (                                               \
37
    __asm__ (                                               \
38 38
        "a1 = %2.H * %1.L (IS,M);\n\t"                  \
39 39
        "a0 = %1.H * %2.H, a1+= %1.H * %2.L (IS,M);\n\t"\
40 40
        "a1 = a1 >>> 16;\n\t"                           \
......
44 44

  
45 45
/* signed 16x16 -> 32 multiply */
46 46
#define MUL16(a, b) ({ int xxo;                         \
47
    asm (                                               \
47
    __asm__ (                                               \
48 48
       "%0 = %1.l*%2.l (is);\n\t"                       \
49 49
       : "=W" (xxo) : "d" (a), "d" (b) : "A1");         \
50 50
    xxo; })
libavcodec/bfin/mpegvideo_bfin.c
88 88
    /*      block[i] = level;                                 */
89 89
    /*  } */
90 90

  
91
    asm volatile
91
    __asm__ volatile
92 92
        ("i2=%1;\n\t"
93 93
         "r1=[%1++];                                                         \n\t"
94 94
         "r0=r1>>>15 (v);                                                    \n\t"
......
114 114

  
115 115
    PROF("zzscan",5);
116 116

  
117
    asm volatile
117
    __asm__ volatile
118 118
        ("r0=b[%1--] (x);         \n\t"
119 119
         "lsetup (0f,1f) lc0=%3;  \n\t"     /*    for(i=63; i>=start_i; i--) { */
120 120
         "0: p0=r0;               \n\t"     /*        j = scantable[i];        */
libavcodec/bitstream.h
55 55
#if defined(ARCH_X86)
56 56
// avoid +32 for shift optimization (gcc should do that ...)
57 57
static inline  int32_t NEG_SSR32( int32_t a, int8_t s){
58
    asm ("sarl %1, %0\n\t"
58
    __asm__ ("sarl %1, %0\n\t"
59 59
         : "+r" (a)
60 60
         : "ic" ((uint8_t)(-s))
61 61
    );
62 62
    return a;
63 63
}
64 64
static inline uint32_t NEG_USR32(uint32_t a, int8_t s){
65
    asm ("shrl %1, %0\n\t"
65
    __asm__ ("shrl %1, %0\n\t"
66 66
         : "+r" (a)
67 67
         : "ic" ((uint8_t)(-s))
68 68
    );
......
248 248
{
249 249
#    ifdef ALIGNED_BITSTREAM_WRITER
250 250
#        if defined(ARCH_X86)
251
    asm volatile(
251
    __asm__ volatile(
252 252
        "movl %0, %%ecx                 \n\t"
253 253
        "xorl %%eax, %%eax              \n\t"
254 254
        "shrdl %%cl, %1, %%eax          \n\t"
......
279 279
#        endif
280 280
#    else //ALIGNED_BITSTREAM_WRITER
281 281
#        if defined(ARCH_X86)
282
    asm volatile(
282
    __asm__ volatile(
283 283
        "movl $7, %%ecx                 \n\t"
284 284
        "andl %0, %%ecx                 \n\t"
285 285
        "addl %3, %%ecx                 \n\t"
......
556 556

  
557 557
#if defined(ARCH_X86)
558 558
#   define SKIP_CACHE(name, gb, num)\
559
        asm(\
559
        __asm__(\
560 560
            "shldl %2, %1, %0          \n\t"\
561 561
            "shll %2, %1               \n\t"\
562 562
            : "+r" (name##_cache0), "+r" (name##_cache1)\
libavcodec/cabac.h
304 304
    int temp;
305 305
#if 0
306 306
    //P3:683    athlon:475
307
    asm(
307
    __asm__(
308 308
        "lea -0x100(%0), %2         \n\t"
309 309
        "shr $31, %2                \n\t"  //FIXME 31->63 for x86-64
310 310
        "shl %%cl, %0               \n\t"
......
313 313
    );
314 314
#elif 0
315 315
    //P3:680    athlon:474
316
    asm(
316
    __asm__(
317 317
        "cmp $0x100, %0             \n\t"
318 318
        "setb %%cl                  \n\t"  //FIXME 31->63 for x86-64
319 319
        "shl %%cl, %0               \n\t"
......
323 323
#elif 1
324 324
    int temp2;
325 325
    //P3:665    athlon:517
326
    asm(
326
    __asm__(
327 327
        "lea -0x100(%0), %%eax      \n\t"
328 328
        "cltd                       \n\t"
329 329
        "mov %0, %%eax              \n\t"
......
336 336
#elif 0
337 337
    int temp2;
338 338
    //P3:673    athlon:509
339
    asm(
339
    __asm__(
340 340
        "cmp $0x100, %0             \n\t"
341 341
        "sbb %%edx, %%edx           \n\t"
342 342
        "mov %0, %%eax              \n\t"
......
349 349
#else
350 350
    int temp2;
351 351
    //P3:677    athlon:511
352
    asm(
352
    __asm__(
353 353
        "cmp $0x100, %0             \n\t"
354 354
        "lea (%0, %0), %%eax        \n\t"
355 355
        "lea (%1, %1), %%edx        \n\t"
......
385 385
    int bit;
386 386

  
387 387
#ifndef BRANCHLESS_CABAC_DECODER
388
    asm volatile(
388
    __asm__ volatile(
389 389
        "movzbl (%1), %0                        \n\t"
390 390
        "movl "RANGE    "(%2), %%ebx            \n\t"
391 391
        "movl "RANGE    "(%2), %%edx            \n\t"
......
524 524
        "add    "tmp"       , "low"                                     \n\t"\
525 525
        "1:                                                             \n\t"
526 526

  
527
    asm volatile(
527
    __asm__ volatile(
528 528
        "movl "RANGE    "(%2), %%esi            \n\t"
529 529
        "movl "LOW      "(%2), %%ebx            \n\t"
530 530
        BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
......
591 591
static int av_unused get_cabac_bypass(CABACContext *c){
592 592
#if 0 //not faster
593 593
    int bit;
594
    asm volatile(
594
    __asm__ volatile(
595 595
        "movl "RANGE    "(%1), %%ebx            \n\t"
596 596
        "movl "LOW      "(%1), %%eax            \n\t"
597 597
        "shl $17, %%ebx                         \n\t"
......
638 638

  
639 639
static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
640 640
#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
641
    asm volatile(
641
    __asm__ volatile(
642 642
        "movl "RANGE    "(%1), %%ebx            \n\t"
643 643
        "movl "LOW      "(%1), %%eax            \n\t"
644 644
        "shl $17, %%ebx                         \n\t"
libavcodec/dct-test.c
177 177
{
178 178
#ifdef HAVE_MMX
179 179
    if (cpu_flags & MM_MMX)
180
        asm volatile ("emms\n\t");
180
        __asm__ volatile ("emms\n\t");
181 181
#endif
182 182
}
183 183

  
libavcodec/dsputil.h
579 579

  
580 580
static inline void emms(void)
581 581
{
582
    asm volatile ("emms;":::"memory");
582
    __asm__ volatile ("emms;":::"memory");
583 583
}
584 584

  
585 585

  
libavcodec/i386/cavsdsp_mmx.c
35 35

  
36 36
static inline void cavs_idct8_1d(int16_t *block, uint64_t bias)
37 37
{
38
    asm volatile(
38
    __asm__ volatile(
39 39
        "movq 112(%0), %%mm4  \n\t" /* mm4 = src7 */
40 40
        "movq  16(%0), %%mm5  \n\t" /* mm5 = src1 */
41 41
        "movq  80(%0), %%mm2  \n\t" /* mm2 = src5 */
......
120 120

  
121 121
        cavs_idct8_1d(block+4*i, ff_pw_4);
122 122

  
123
        asm volatile(
123
        __asm__ volatile(
124 124
            "psraw     $3, %%mm7  \n\t"
125 125
            "psraw     $3, %%mm6  \n\t"
126 126
            "psraw     $3, %%mm5  \n\t"
......
150 150
    for(i=0; i<2; i++){
151 151
        cavs_idct8_1d(b2+4*i, ff_pw_64);
152 152

  
153
        asm volatile(
153
        __asm__ volatile(
154 154
            "psraw     $7, %%mm7  \n\t"
155 155
            "psraw     $7, %%mm6  \n\t"
156 156
            "psraw     $7, %%mm5  \n\t"
......
175 175
    add_pixels_clamped_mmx(b2, dst, stride);
176 176

  
177 177
    /* clear block */
178
    asm volatile(
178
    __asm__ volatile(
179 179
            "pxor %%mm7, %%mm7   \n\t"
180 180
            "movq %%mm7, (%0)    \n\t"
181 181
            "movq %%mm7, 8(%0)   \n\t"
......
275 275
    src -= 2*srcStride;\
276 276
    \
277 277
    while(w--){\
278
      asm volatile(\
278
      __asm__ volatile(\
279 279
        "pxor %%mm7, %%mm7          \n\t"\
280 280
        "movd (%0), %%mm0           \n\t"\
281 281
        "add %2, %0                 \n\t"\
......
306 306
        : "memory"\
307 307
     );\
308 308
     if(h==16){\
309
        asm volatile(\
309
        __asm__ volatile(\
310 310
            VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
311 311
            VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP)\
312 312
            VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP)\
......
328 328
#define QPEL_CAVS(OPNAME, OP, MMX)\
329 329
static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
330 330
    int h=8;\
331
    asm volatile(\
331
    __asm__ volatile(\
332 332
        "pxor %%mm7, %%mm7          \n\t"\
333 333
        "movq %5, %%mm6             \n\t"\
334 334
        "1:                         \n\t"\
libavcodec/i386/cpuid.c
28 28

  
29 29
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
30 30
#define cpuid(index,eax,ebx,ecx,edx)\
31
    asm volatile\
31
    __asm__ volatile\
32 32
        ("mov %%"REG_b", %%"REG_S"\n\t"\
33 33
         "cpuid\n\t"\
34 34
         "xchg %%"REG_b", %%"REG_S\
......
44 44
    int max_std_level, max_ext_level, std_caps=0, ext_caps=0;
45 45
    x86_reg a, c;
46 46

  
47
    asm volatile (
47
    __asm__ volatile (
48 48
        /* See if CPUID instruction is supported ... */
49 49
        /* ... Get copies of EFLAGS into eax and ecx */
50 50
        "pushf\n\t"
libavcodec/i386/dsputil_h264_template_mmx.c
47 47

  
48 48
        rnd_reg = rnd ? &ff_pw_4 : &ff_pw_3;
49 49

  
50
        asm volatile(
50
        __asm__ volatile(
51 51
            "movd %0, %%mm5\n\t"
52 52
            "movq %1, %%mm4\n\t"
53 53
            "movq %2, %%mm6\n\t"         /* mm6 = rnd */
......
58 58
            :: "rm"(x+y), "m"(ff_pw_8), "m"(*rnd_reg));
59 59

  
60 60
        for(i=0; i<h; i++) {
61
            asm volatile(
61
            __asm__ volatile(
62 62
                /* mm0 = src[0..7], mm1 = src[1..8] */
63 63
                "movq %0, %%mm0\n\t"
64 64
                "movq %1, %%mm2\n\t"
65 65
                :: "m"(src[0]), "m"(src[dxy]));
66 66

  
67
            asm volatile(
67
            __asm__ volatile(
68 68
                /* [mm0,mm1] = A * src[0..7] */
69 69
                /* [mm2,mm3] = B * src[1..8] */
70 70
                "movq %%mm0, %%mm1\n\t"
......
98 98

  
99 99
    /* general case, bilinear */
100 100
    rnd_reg = rnd ? &ff_pw_32.a : &ff_pw_28.a;
101
    asm volatile("movd %2, %%mm4\n\t"
101
    __asm__ volatile("movd %2, %%mm4\n\t"
102 102
                 "movd %3, %%mm6\n\t"
103 103
                 "punpcklwd %%mm4, %%mm4\n\t"
104 104
                 "punpcklwd %%mm6, %%mm6\n\t"
......
119 119
                 "movq %%mm4, %0\n\t"
120 120
                 : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64));
121 121

  
122
    asm volatile(
122
    __asm__ volatile(
123 123
        /* mm0 = src[0..7], mm1 = src[1..8] */
124 124
        "movq %0, %%mm0\n\t"
125 125
        "movq %1, %%mm1\n\t"
......
128 128
    for(i=0; i<h; i++) {
129 129
        src += stride;
130 130

  
131
        asm volatile(
131
        __asm__ volatile(
132 132
            /* mm2 = A * src[0..3] + B * src[1..4] */
133 133
            /* mm3 = A * src[4..7] + B * src[5..8] */
134 134
            "movq %%mm0, %%mm2\n\t"
......
145 145
            "paddw %%mm0, %%mm3\n\t"
146 146
            : : "m" (AA));
147 147

  
148
        asm volatile(
148
        __asm__ volatile(
149 149
            /* [mm2,mm3] += C * src[0..7] */
150 150
            "movq %0, %%mm0\n\t"
151 151
            "movq %%mm0, %%mm1\n\t"
......
157 157
            "paddw %%mm1, %%mm3\n\t"
158 158
            : : "m" (src[0]));
159 159

  
160
        asm volatile(
160
        __asm__ volatile(
161 161
            /* [mm2,mm3] += D * src[1..8] */
162 162
            "movq %1, %%mm1\n\t"
163 163
            "movq %%mm1, %%mm0\n\t"
......
171 171
            "movq %0, %%mm0\n\t"
172 172
            : : "m" (src[0]), "m" (src[1]), "m" (DD));
173 173

  
174
        asm volatile(
174
        __asm__ volatile(
175 175
            /* dst[0..7] = ([mm2,mm3] + 32) >> 6 */
176 176
            "paddw %1, %%mm2\n\t"
177 177
            "paddw %1, %%mm3\n\t"
......
187 187

  
188 188
static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
189 189
{
190
    asm volatile(
190
    __asm__ volatile(
191 191
        "pxor   %%mm7, %%mm7        \n\t"
192 192
        "movd %5, %%mm2             \n\t"
193 193
        "movd %6, %%mm3             \n\t"
......
259 259
    int tmp = ((1<<16)-1)*x + 8;
260 260
    int CD= tmp*y;
261 261
    int AB= (tmp<<3) - CD;
262
    asm volatile(
262
    __asm__ volatile(
263 263
        /* mm5 = {A,B,A,B} */
264 264
        /* mm6 = {C,D,C,D} */
265 265
        "movd %0, %%mm5\n\t"
......
274 274
        :: "r"(AB), "r"(CD), "m"(src[0]));
275 275

  
276 276

  
277
    asm volatile(
277
    __asm__ volatile(
278 278
        "1:\n\t"
279 279
        "add %4, %1\n\t"
280 280
        /* mm1 = A * src[0,1] + B * src[1,2] */
libavcodec/i386/dsputil_h264_template_ssse3.c
37 37
    if(y==0 || x==0)
38 38
    {
39 39
        /* 1 dimensional filter only */
40
        asm volatile(
40
        __asm__ volatile(
41 41
            "movd %0, %%xmm7 \n\t"
42 42
            "movq %1, %%xmm6 \n\t"
43 43
            "pshuflw $0, %%xmm7, %%xmm7 \n\t"
......
47 47
        );
48 48

  
49 49
        if(x) {
50
            asm volatile(
50
            __asm__ volatile(
51 51
                "1: \n\t"
52 52
                "movq (%1), %%xmm0 \n\t"
53 53
                "movq 1(%1), %%xmm1 \n\t"
......
75 75
                :"r"((x86_reg)stride)
76 76
            );
77 77
        } else {
78
            asm volatile(
78
            __asm__ volatile(
79 79
                "1: \n\t"
80 80
                "movq (%1), %%xmm0 \n\t"
81 81
                "movq (%1,%3), %%xmm1 \n\t"
......
107 107
    }
108 108

  
109 109
    /* general case, bilinear */
110
    asm volatile(
110
    __asm__ volatile(
111 111
        "movd %0, %%xmm7 \n\t"
112 112
        "movd %1, %%xmm6 \n\t"
113 113
        "movdqa %2, %%xmm5 \n\t"
......
118 118
        :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(*(rnd?&ff_pw_32:&ff_pw_28))
119 119
    );
120 120

  
121
    asm volatile(
121
    __asm__ volatile(
122 122
        "movq (%1), %%xmm0 \n\t"
123 123
        "movq 1(%1), %%xmm1 \n\t"
124 124
        "punpcklbw %%xmm1, %%xmm0 \n\t"
......
160 160

  
161 161
static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 4*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
162 162
{
163
    asm volatile(
163
    __asm__ volatile(
164 164
        "movd %0, %%mm7 \n\t"
165 165
        "movd %1, %%mm6 \n\t"
166 166
        "movq %2, %%mm5 \n\t"
......
169 169
        :: "r"((x*255+8)*(8-y)), "r"((x*255+8)*y), "m"(ff_pw_32)
170 170
    );
171 171

  
172
    asm volatile(
172
    __asm__ volatile(
173 173
        "movd (%1), %%mm0 \n\t"
174 174
        "punpcklbw 1(%1), %%mm0 \n\t"
175 175
        "add %3, %1 \n\t"
libavcodec/i386/dsputil_mmx.c
70 70
DECLARE_ALIGNED_16(const double, ff_pd_1[2]) = { 1.0, 1.0 };
71 71
DECLARE_ALIGNED_16(const double, ff_pd_2[2]) = { 2.0, 2.0 };
72 72

  
73
#define JUMPALIGN() asm volatile (ASMALIGN(3)::)
74
#define MOVQ_ZERO(regd)  asm volatile ("pxor %%" #regd ", %%" #regd ::)
73
#define JUMPALIGN() __asm__ volatile (ASMALIGN(3)::)
74
#define MOVQ_ZERO(regd)  __asm__ volatile ("pxor %%" #regd ", %%" #regd ::)
75 75

  
76 76
#define MOVQ_BFE(regd) \
77
    asm volatile ( \
77
    __asm__ volatile ( \
78 78
    "pcmpeqd %%" #regd ", %%" #regd " \n\t"\
79 79
    "paddb %%" #regd ", %%" #regd " \n\t" ::)
80 80

  
81 81
#ifndef PIC
82
#define MOVQ_BONE(regd)  asm volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone))
83
#define MOVQ_WTWO(regd)  asm volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo))
82
#define MOVQ_BONE(regd)  __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_bone))
83
#define MOVQ_WTWO(regd)  __asm__ volatile ("movq %0, %%" #regd " \n\t" ::"m"(ff_wtwo))
84 84
#else
85 85
// for shared library it's better to use this way for accessing constants
86 86
// pcmpeqd -> -1
87 87
#define MOVQ_BONE(regd) \
88
    asm volatile ( \
88
    __asm__ volatile ( \
89 89
    "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
90 90
    "psrlw $15, %%" #regd " \n\t" \
91 91
    "packuswb %%" #regd ", %%" #regd " \n\t" ::)
92 92

  
93 93
#define MOVQ_WTWO(regd) \
94
    asm volatile ( \
94
    __asm__ volatile ( \
95 95
    "pcmpeqd %%" #regd ", %%" #regd " \n\t" \
96 96
    "psrlw $15, %%" #regd " \n\t" \
97 97
    "psllw $1, %%" #regd " \n\t"::)
......
223 223
    p = block;
224 224
    pix = pixels;
225 225
    /* unrolled loop */
226
        asm volatile(
226
        __asm__ volatile(
227 227
                "movq   %3, %%mm0               \n\t"
228 228
                "movq   8%3, %%mm1              \n\t"
229 229
                "movq   16%3, %%mm2             \n\t"
......
248 248
    // if here would be an exact copy of the code above
249 249
    // compiler would generate some very strange code
250 250
    // thus using "r"
251
    asm volatile(
251
    __asm__ volatile(
252 252
            "movq       (%3), %%mm0             \n\t"
253 253
            "movq       8(%3), %%mm1            \n\t"
254 254
            "movq       16(%3), %%mm2           \n\t"
......
299 299
    MOVQ_ZERO(mm7);
300 300
    i = 4;
301 301
    do {
302
        asm volatile(
302
        __asm__ volatile(
303 303
                "movq   (%2), %%mm0     \n\t"
304 304
                "movq   8(%2), %%mm1    \n\t"
305 305
                "movq   16(%2), %%mm2   \n\t"
......
330 330

  
331 331
static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
332 332
{
333
    asm volatile(
333
    __asm__ volatile(
334 334
         "lea (%3, %3), %%"REG_a"       \n\t"
335 335
         ASMALIGN(3)
336 336
         "1:                            \n\t"
......
356 356

  
357 357
static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
358 358
{
359
    asm volatile(
359
    __asm__ volatile(
360 360
         "lea (%3, %3), %%"REG_a"       \n\t"
361 361
         ASMALIGN(3)
362 362
         "1:                            \n\t"
......
382 382

  
383 383
static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
384 384
{
385
    asm volatile(
385
    __asm__ volatile(
386 386
         "lea (%3, %3), %%"REG_a"       \n\t"
387 387
         ASMALIGN(3)
388 388
         "1:                            \n\t"
......
416 416

  
417 417
static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
418 418
{
419
    asm volatile(
419
    __asm__ volatile(
420 420
         "1:                            \n\t"
421 421
         "movdqu (%1), %%xmm0           \n\t"
422 422
         "movdqu (%1,%3), %%xmm1        \n\t"
......
438 438

  
439 439
static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h)
440 440
{
441
    asm volatile(
441
    __asm__ volatile(
442 442
         "1:                            \n\t"
443 443
         "movdqu (%1), %%xmm0           \n\t"
444 444
         "movdqu (%1,%3), %%xmm1        \n\t"
......
464 464

  
465 465
static void clear_blocks_mmx(DCTELEM *blocks)
466 466
{
467
    asm volatile(
467
    __asm__ volatile(
468 468
                "pxor %%mm7, %%mm7              \n\t"
469 469
                "mov $-128*6, %%"REG_a"         \n\t"
470 470
                "1:                             \n\t"
......
481 481

  
482 482
static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
483 483
    x86_reg i=0;
484
    asm volatile(
484
    __asm__ volatile(
485 485
        "jmp 2f                         \n\t"
486 486
        "1:                             \n\t"
487 487
        "movq  (%1, %0), %%mm0          \n\t"
......
505 505

  
506 506
static void add_bytes_l2_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
507 507
    x86_reg i=0;
508
    asm volatile(
508
    __asm__ volatile(
509 509
        "jmp 2f                         \n\t"
510 510
        "1:                             \n\t"
511 511
        "movq   (%2, %0), %%mm0         \n\t"
......
600 600
    if(ENABLE_ANY_H263) {
601 601
    const int strength= ff_h263_loop_filter_strength[qscale];
602 602

  
603
    asm volatile(
603
    __asm__ volatile(
604 604

  
605 605
        H263_LOOP_FILTER
606 606

  
......
618 618
}
619 619

  
620 620
static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
621
    asm volatile( //FIXME could save 1 instruction if done as 8x4 ...
621
    __asm__ volatile( //FIXME could save 1 instruction if done as 8x4 ...
622 622
        "movd  %4, %%mm0                \n\t"
623 623
        "movd  %5, %%mm1                \n\t"
624 624
        "movd  %6, %%mm2                \n\t"
......
656 656

  
657 657
    transpose4x4(btemp  , src           , 8, stride);
658 658
    transpose4x4(btemp+4, src + 4*stride, 8, stride);
659
    asm volatile(
659
    __asm__ volatile(
660 660
        H263_LOOP_FILTER // 5 3 4 6
661 661

  
662 662
        : "+m" (temp[0]),
......
666 666
        : "g" (2*strength), "m"(ff_pb_FC)
667 667
    );
668 668

  
669
    asm volatile(
669
    __asm__ volatile(
670 670
        "movq %%mm5, %%mm1              \n\t"
671 671
        "movq %%mm4, %%mm0              \n\t"
672 672
        "punpcklbw %%mm3, %%mm5         \n\t"
......
711 711
    ptr = buf;
712 712
    if(w==8)
713 713
    {
714
        asm volatile(
714
        __asm__ volatile(
715 715
                "1:                             \n\t"
716 716
                "movd (%0), %%mm0               \n\t"
717 717
                "punpcklbw %%mm0, %%mm0         \n\t"
......
732 732
    }
733 733
    else
734 734
    {
735
        asm volatile(
735
        __asm__ volatile(
736 736
                "1:                             \n\t"
737 737
                "movd (%0), %%mm0               \n\t"
738 738
                "punpcklbw %%mm0, %%mm0         \n\t"
......
757 757
    for(i=0;i<w;i+=4) {
758 758
        /* top and bottom (and hopefully also the corners) */
759 759
        ptr= buf - (i + 1) * wrap - w;
760
        asm volatile(
760
        __asm__ volatile(
761 761
                "1:                             \n\t"
762 762
                "movq (%1, %0), %%mm0           \n\t"
763 763
                "movq %%mm0, (%0)               \n\t"
......
771 771
                : "r" ((x86_reg)buf - (x86_reg)ptr - w), "r" ((x86_reg)-wrap), "r" ((x86_reg)-wrap*3), "r" (ptr+width+2*w)
772 772
        );
773 773
        ptr= last_line + (i + 1) * wrap - w;
774
        asm volatile(
774
        __asm__ volatile(
775 775
                "1:                             \n\t"
776 776
                "movq (%1, %0), %%mm0           \n\t"
777 777
                "movq %%mm0, (%0)               \n\t"
......
792 792
{\
793 793
    x86_reg i = -bpp;\
794 794
    x86_reg end = w-3;\
795
    asm volatile(\
795
    __asm__ volatile(\
796 796
        "pxor      %%mm7, %%mm7 \n"\
797 797
        "movd    (%1,%0), %%mm0 \n"\
798 798
        "movd    (%2,%0), %%mm1 \n"\
......
886 886
static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
887 887
    uint64_t temp;\
888 888
\
889
    asm volatile(\
889
    __asm__ volatile(\
890 890
        "pxor %%mm7, %%mm7                \n\t"\
891 891
        "1:                               \n\t"\
892 892
        "movq  (%0), %%mm0                \n\t" /* ABCDEFGH */\
......
1025 1025
        temp[13]= (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]);\
1026 1026
        temp[14]= (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]);\
1027 1027
        temp[15]= (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]);\
1028
        asm volatile(\
1028
        __asm__ volatile(\
1029 1029
            "movq (%0), %%mm0               \n\t"\
1030 1030
            "movq 8(%0), %%mm1              \n\t"\
1031 1031
            "paddw %2, %%mm0                \n\t"\
......
1051 1051
}\
1052 1052
\
1053 1053
static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1054
    asm volatile(\
1054
    __asm__ volatile(\
1055 1055
        "pxor %%mm7, %%mm7                \n\t"\
1056 1056
        "1:                               \n\t"\
1057 1057
        "movq  (%0), %%mm0                \n\t" /* ABCDEFGH */\
......
1128 1128
        temp[ 5]= (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 8]);\
1129 1129
        temp[ 6]= (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 8])*3 - (src[ 3]+src[ 7]);\
1130 1130
        temp[ 7]= (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 8])*6 + (src[ 5]+src[ 7])*3 - (src[ 4]+src[ 6]);\
1131
        asm volatile(\
1131
        __asm__ volatile(\
1132 1132
            "movq (%0), %%mm0           \n\t"\
1133 1133
            "movq 8(%0), %%mm1          \n\t"\
1134 1134
            "paddw %2, %%mm0            \n\t"\
......
1153 1153
    int count= 17;\
1154 1154
\
1155 1155
    /*FIXME unroll */\
1156
    asm volatile(\
1156
    __asm__ volatile(\
1157 1157
        "pxor %%mm7, %%mm7              \n\t"\
1158 1158
        "1:                             \n\t"\
1159 1159
        "movq (%0), %%mm0               \n\t"\
......
1181 1181
    count=4;\
1182 1182
    \
1183 1183
/*FIXME reorder for speed */\
1184
    asm volatile(\
1184
    __asm__ volatile(\
1185 1185
        /*"pxor %%mm7, %%mm7              \n\t"*/\
1186 1186
        "1:                             \n\t"\
1187 1187
        "movq (%0), %%mm0               \n\t"\
......
1231 1231
    int count= 9;\
1232 1232
\
1233 1233
    /*FIXME unroll */\
1234
    asm volatile(\
1234
    __asm__ volatile(\
1235 1235
        "pxor %%mm7, %%mm7              \n\t"\
1236 1236
        "1:                             \n\t"\
1237 1237
        "movq (%0), %%mm0               \n\t"\
......
1253 1253
    count=2;\
1254 1254
    \
1255 1255
/*FIXME reorder for speed */\
1256
    asm volatile(\
1256
    __asm__ volatile(\
1257 1257
        /*"pxor %%mm7, %%mm7              \n\t"*/\
1258 1258
        "1:                             \n\t"\
1259 1259
        "movq (%0), %%mm0               \n\t"\
......
1620 1620
        src = edge_buf;
1621 1621
    }
1622 1622

  
1623
    asm volatile(
1623
    __asm__ volatile(
1624 1624
        "movd         %0, %%mm6 \n\t"
1625 1625
        "pxor      %%mm7, %%mm7 \n\t"
1626 1626
        "punpcklwd %%mm6, %%mm6 \n\t"
......
1639 1639
                            oys - dyys + dyxs*(x+3) };
1640 1640

  
1641 1641
        for(y=0; y<h; y++){
1642
            asm volatile(
1642
            __asm__ volatile(
1643 1643
                "movq   %0,  %%mm4 \n\t"
1644 1644
                "movq   %1,  %%mm5 \n\t"
1645 1645
                "paddw  %2,  %%mm4 \n\t"
......
1652 1652
                : "m"(*dxy4), "m"(*dyy4)
1653 1653
            );
1654 1654

  
1655
            asm volatile(
1655
            __asm__ volatile(
1656 1656
                "movq   %%mm6, %%mm2 \n\t"
1657 1657
                "movq   %%mm6, %%mm1 \n\t"
1658 1658
                "psubw  %%mm4, %%mm2 \n\t"
......
1701 1701
static void name(void *mem, int stride, int h){\
1702 1702
    const uint8_t *p= mem;\
1703 1703
    do{\
1704
        asm volatile(#op" %0" :: "m"(*p));\
1704
        __asm__ volatile(#op" %0" :: "m"(*p));\
1705 1705
        p+= stride;\
1706 1706
    }while(--h);\
1707 1707
}
......
1787 1787
static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
1788 1788
{
1789 1789
    int i;
1790
    asm volatile("pxor %%mm7, %%mm7":);
1790
    __asm__ volatile("pxor %%mm7, %%mm7":);
1791 1791
    for(i=0; i<blocksize; i+=2) {
1792
        asm volatile(
1792
        __asm__ volatile(
1793 1793
            "movq    %0,    %%mm0 \n\t"
1794 1794
            "movq    %1,    %%mm1 \n\t"
1795 1795
            "movq    %%mm0, %%mm2 \n\t"
......
1809 1809
            ::"memory"
1810 1810
        );
1811 1811
    }
1812
    asm volatile("femms");
1812
    __asm__ volatile("femms");
1813 1813
}
1814 1814
static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
1815 1815
{
1816 1816
    int i;
1817 1817

  
1818
    asm volatile(
1818
    __asm__ volatile(
1819 1819
            "movaps  %0,     %%xmm5 \n\t"
1820 1820
        ::"m"(ff_pdw_80000000[0])
1821 1821
    );
1822 1822
    for(i=0; i<blocksize; i+=4) {
1823
        asm volatile(
1823
        __asm__ volatile(
1824 1824
            "movaps  %0,     %%xmm0 \n\t"
1825 1825
            "movaps  %1,     %%xmm1 \n\t"
1826 1826
            "xorps   %%xmm2, %%xmm2 \n\t"
......
1846 1846
#define IF0(x)
1847 1847

  
1848 1848
#define MIX5(mono,stereo)\
1849
    asm volatile(\
1849
    __asm__ volatile(\
1850 1850
        "movss          0(%2), %%xmm5 \n"\
1851 1851
        "movss          8(%2), %%xmm6 \n"\
1852 1852
        "movss         24(%2), %%xmm7 \n"\
......
1879 1879
    );
1880 1880

  
1881 1881
#define MIX_MISC(stereo)\
1882
    asm volatile(\
1882
    __asm__ volatile(\
1883 1883
        "1: \n"\
1884 1884
        "movaps  (%3,%0), %%xmm0 \n"\
1885 1885
 stereo("movaps   %%xmm0, %%xmm1 \n")\
......
1919 1919
    } else {
1920 1920
        DECLARE_ALIGNED_16(float, matrix_simd[in_ch][2][4]);
1921 1921
        j = 2*in_ch*sizeof(float);
1922
        asm volatile(
1922
        __asm__ volatile(
1923 1923
            "1: \n"
1924 1924
            "sub $8, %0 \n"
1925 1925
            "movss     (%2,%0), %%xmm6 \n"
......
1943 1943

  
1944 1944
static void vector_fmul_3dnow(float *dst, const float *src, int len){
1945 1945
    x86_reg i = (len-4)*4;
1946
    asm volatile(
1946
    __asm__ volatile(
1947 1947
        "1: \n\t"
1948 1948
        "movq    (%1,%0), %%mm0 \n\t"
1949 1949
        "movq   8(%1,%0), %%mm1 \n\t"
......
1961 1961
}
1962 1962
static void vector_fmul_sse(float *dst, const float *src, int len){
1963 1963
    x86_reg i = (len-8)*4;
1964
    asm volatile(
1964
    __asm__ volatile(
1965 1965
        "1: \n\t"
1966 1966
        "movaps    (%1,%0), %%xmm0 \n\t"
1967 1967
        "movaps  16(%1,%0), %%xmm1 \n\t"
......
1979 1979

  
1980 1980
static void vector_fmul_reverse_3dnow2(float *dst, const float *src0, const float *src1, int len){
1981 1981
    x86_reg i = len*4-16;
1982
    asm volatile(
1982
    __asm__ volatile(
1983 1983
        "1: \n\t"
1984 1984
        "pswapd   8(%1), %%mm0 \n\t"
1985 1985
        "pswapd    (%1), %%mm1 \n\t"
......
1993 1993
        :"+r"(i), "+r"(src1)
1994 1994
        :"r"(dst), "r"(src0)
1995 1995
    );
1996
    asm volatile("femms");
1996
    __asm__ volatile("femms");
1997 1997
}
1998 1998
static void vector_fmul_reverse_sse(float *dst, const float *src0, const float *src1, int len){
1999 1999
    x86_reg i = len*4-32;
2000
    asm volatile(
2000
    __asm__ volatile(
2001 2001
        "1: \n\t"
2002 2002
        "movaps        16(%1), %%xmm0 \n\t"
2003 2003
        "movaps          (%1), %%xmm1 \n\t"
......
2020 2020
    x86_reg i = (len-4)*4;
2021 2021
    if(step == 2 && src3 == 0){
2022 2022
        dst += (len-4)*2;
2023
        asm volatile(
2023
        __asm__ volatile(
2024 2024
            "1: \n\t"
2025 2025
            "movq   (%2,%0),  %%mm0 \n\t"
2026 2026
            "movq  8(%2,%0),  %%mm1 \n\t"
......
2043 2043
        );
2044 2044
    }
2045 2045
    else if(step == 1 && src3 == 0){
2046
        asm volatile(
2046
        __asm__ volatile(
2047 2047
            "1: \n\t"
2048 2048
            "movq    (%2,%0), %%mm0 \n\t"
2049 2049
            "movq   8(%2,%0), %%mm1 \n\t"
......
2062 2062
    }
2063 2063
    else
2064 2064
        ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step);
2065
    asm volatile("femms");
2065
    __asm__ volatile("femms");
2066 2066
}
2067 2067
static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *src1,
2068 2068
                                    const float *src2, int src3, int len, int step){
2069 2069
    x86_reg i = (len-8)*4;
2070 2070
    if(step == 2 && src3 == 0){
2071 2071
        dst += (len-8)*2;
2072
        asm volatile(
2072
        __asm__ volatile(
2073 2073
            "1: \n\t"
2074 2074
            "movaps   (%2,%0), %%xmm0 \n\t"
2075 2075
            "movaps 16(%2,%0), %%xmm1 \n\t"
......
2100 2100
        );
2101 2101
    }
2102 2102
    else if(step == 1 && src3 == 0){
2103
        asm volatile(
2103
        __asm__ volatile(
2104 2104
            "1: \n\t"
2105 2105
            "movaps   (%2,%0), %%xmm0 \n\t"
2106 2106
            "movaps 16(%2,%0), %%xmm1 \n\t"
......
2127 2127
    if(add_bias == 0){
2128 2128
        x86_reg i = -len*4;
2129 2129
        x86_reg j = len*4-8;
2130
        asm volatile(
2130
        __asm__ volatile(
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff