Revision 3f87f39c libavcodec/x86/x86inc.asm

View differences:

libavcodec/x86/x86inc.asm
20 20
;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 21
;*****************************************************************************
22 22

  
23
%ifdef ARCH_X86_64
24
    %ifidn __OUTPUT_FORMAT__,win32
25
        %define WIN64
26
    %else
27
        %define UNIX64
28
    %endif
29
%endif
30

  
23 31
; FIXME: All of the 64bit asm functions that take a stride as an argument
24 32
; via register, assume that the high dword of that register is filled with 0.
25 33
; This is true in practice (since we never do any 64bit arithmetic on strides,
......
28 36
; Name of the .rodata section.
29 37
; Kludge: Something on OS X fails to align .rodata even given an align attribute,
30 38
; so use a different read-only section.
31
%macro SECTION_RODATA 0
39
%macro SECTION_RODATA 0-1 16
32 40
    %ifidn __OUTPUT_FORMAT__,macho64
33
        SECTION .text align=16
41
        SECTION .text align=%1
34 42
    %elifidn __OUTPUT_FORMAT__,macho
35
        SECTION .text align=16
43
        SECTION .text align=%1
36 44
        fakegot:
37 45
    %else
38
        SECTION .rodata align=16
46
        SECTION .rodata align=%1
39 47
    %endif
40 48
%endmacro
41 49

  
42
; PIC support macros. All these macros are totally harmless when PIC is
43
; not defined but can ruin everything if misused in PIC mode. On x86_32, shared
44
; objects cannot directly access global variables by address, they need to
45
; go through the GOT (global offset table). Most OSes do not care about it
46
; and let you load non-shared .so objects (Linux, Win32...). However, OS X
47
; requires PIC code in its .dylib objects.
48
;
49
; - GLOBAL should be used as a suffix for global addressing, eg.
50
;     picgetgot ebx
50
; PIC support macros.
51
; x86_64 can't fit 64bit address literals in most instruction types,
52
; so shared objects (under the assumption that they might be anywhere
53
; in memory) must use an address mode that does fit.
54
; So all accesses to global variables must use this macro, e.g.
51 55
;     mov eax, [foo GLOBAL]
52 56
;   instead of
53 57
;     mov eax, [foo]
54 58
;
55
; - picgetgot computes the GOT address into the given register in PIC
56
;   mode, otherwise does nothing. You need to do this before using GLOBAL.
57
;   Before in both execution order and compiled code order (so GLOBAL knows
58
;   which register the GOT is in).
59

  
60
%ifndef PIC
61
    %define GLOBAL
62
    %macro picgetgot 1
63
    %endmacro
64
%elifdef ARCH_X86_64
65
    %define PIC64
59
; x86_32 doesn't require PIC.
60
; Some distros prefer shared objects to be PIC, but nothing breaks if
61
; the code contains a few textrels, so we'll skip that complexity.
62

  
63
%ifdef WIN64
64
    %define PIC
65
%elifndef ARCH_X86_64
66
    %undef PIC
67
%endif
68
%ifdef PIC
66 69
    %define GLOBAL wrt rip
67
    %macro picgetgot 1
68
    %endmacro
69 70
%else
70
    %define PIC32
71
    %ifidn __OUTPUT_FORMAT__,macho
72
        ; There is no real global offset table on OS X, but we still
73
        ; need to reference our variables by offset.
74
        %macro picgetgot 1
75
            call %%getgot
76
          %%getgot:
77
            pop %1
78
            add %1, $$ - %%getgot
79
            %undef GLOBAL
80
            %define GLOBAL + %1 - fakegot
81
        %endmacro
82
    %else ; elf
83
        extern _GLOBAL_OFFSET_TABLE_
84
        %macro picgetgot 1
85
            call %%getgot
86
          %%getgot:
87
            pop %1
88
            add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%getgot wrt ..gotpc
89
            %undef GLOBAL
90
            %define GLOBAL + %1 wrt ..gotoff
91
        %endmacro
92
    %endif
71
    %define GLOBAL
93 72
%endif
94 73

  
95 74
; Macros to eliminate most code duplication between x86_32 and x86_64:
......
99 78

  
100 79
; PROLOGUE:
101 80
; %1 = number of arguments. loads them from stack if needed.
102
; %2 = number of registers used, not including PIC. pushes callee-saved regs if needed.
103
; %3 = whether global constants are used in this function. inits x86_32 PIC if needed.
81
; %2 = number of registers used. pushes callee-saved regs if needed.
82
; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed.
104 83
; %4 = list of names to define to registers
105 84
; PROLOGUE can also be invoked by adding the same options to cglobal
106 85

  
107 86
; e.g.
108
; cglobal foo, 2,3,0, dst, src, tmp
109
; declares a function (foo), taking two args (dst and src), one local variable (tmp), and not using globals
87
; cglobal foo, 2,3, dst, src, tmp
88
; declares a function (foo), taking two args (dst and src) and one local variable (tmp)
110 89

  
111 90
; TODO Some functions can use some args directly from the stack. If they're the
112 91
; last args then you can just not declare them, but if they're in the middle
......
119 98
; Same, but if it doesn't pop anything it becomes a 2-byte ret, for athlons
120 99
; which are slow when a normal ret follows a branch.
121 100

  
101
; registers:
102
; rN and rNq are the native-size register holding function argument N
103
; rNd, rNw, rNb are dword, word, and byte size
104
; rNm is the original location of arg N (a register or on the stack), dword
105
; rNmp is native size
106

  
122 107
%macro DECLARE_REG 6
123 108
    %define r%1q %2
124 109
    %define r%1d %3
125 110
    %define r%1w %4
126 111
    %define r%1b %5
127 112
    %define r%1m %6
113
    %ifid %6 ; i.e. it's a register
114
        %define r%1mp %2
115
    %elifdef ARCH_X86_64 ; memory
116
        %define r%1mp qword %6
117
    %else
118
        %define r%1mp dword %6
119
    %endif
128 120
    %define r%1  %2
129 121
%endmacro
130 122

  
......
150 142
DECLARE_REG_SIZE di, dil
151 143
DECLARE_REG_SIZE bp, bpl
152 144

  
145
; t# defines for when per-arch register allocation is more complex than just function arguments
146

  
147
%macro DECLARE_REG_TMP 1-*
148
    %assign %%i 0
149
    %rep %0
150
        CAT_XDEFINE t, %%i, r%1
151
        %assign %%i %%i+1
152
        %rotate 1
153
    %endrep
154
%endmacro
155

  
156
%macro DECLARE_REG_TMP_SIZE 0-*
157
    %rep %0
158
        %define t%1q t%1 %+ q
159
        %define t%1d t%1 %+ d
160
        %define t%1w t%1 %+ w
161
        %define t%1b t%1 %+ b
162
        %rotate 1
163
    %endrep
164
%endmacro
165

  
166
DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7
167

  
153 168
%ifdef ARCH_X86_64
154 169
    %define gprsize 8
155 170
%else
......
224 239
    %assign n_arg_names %%i
225 240
%endmacro
226 241

  
227
%ifdef ARCH_X86_64 ;==========================================================
228
%ifidn __OUTPUT_FORMAT__,win32
242
%ifdef WIN64 ; Windows x64 ;=================================================
229 243

  
230 244
DECLARE_REG 0, rcx, ecx, cx,  cl,  ecx
231 245
DECLARE_REG 1, rdx, edx, dx,  dl,  edx
......
239 253

  
240 254
%macro LOAD_IF_USED 2 ; reg_id, number_of_args
241 255
    %if %1 < %2
242
        mov r%1, [rsp + 8 + %1*8]
256
        mov r%1, [rsp + stack_offset + 8 + %1*8]
257
    %endif
258
%endmacro
259

  
260
%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
261
    ASSERT %2 >= %1
262
    %assign regs_used %2
263
    ASSERT regs_used <= 7
264
    %if %0 > 2
265
        %assign xmm_regs_used %3
266
    %else
267
        %assign xmm_regs_used 0
268
    %endif
269
    ASSERT xmm_regs_used <= 16
270
    %if regs_used > 4
271
        push r4
272
        push r5
273
        %assign stack_offset stack_offset+16
274
    %endif
275
    %if xmm_regs_used > 6
276
        sub rsp, (xmm_regs_used-6)*16+16
277
        %assign stack_offset stack_offset+(xmm_regs_used-6)*16+16
278
        %assign %%i xmm_regs_used
279
        %rep (xmm_regs_used-6)
280
            %assign %%i %%i-1
281
            movdqa [rsp + (%%i-6)*16+8], xmm %+ %%i
282
        %endrep
283
    %endif
284
    LOAD_IF_USED 4, %1
285
    LOAD_IF_USED 5, %1
286
    LOAD_IF_USED 6, %1
287
    DEFINE_ARGS %4
288
%endmacro
289

  
290
%macro RESTORE_XMM_INTERNAL 1
291
    %if xmm_regs_used > 6
292
        %assign %%i xmm_regs_used
293
        %rep (xmm_regs_used-6)
294
            %assign %%i %%i-1
295
            movdqa xmm %+ %%i, [%1 + (%%i-6)*16+8]
296
        %endrep
297
        add %1, (xmm_regs_used-6)*16+16
298
    %endif
299
%endmacro
300

  
301
%macro RESTORE_XMM 1
302
    RESTORE_XMM_INTERNAL %1
303
    %assign stack_offset stack_offset-(xmm_regs_used-6)*16+16
304
    %assign xmm_regs_used 0
305
%endmacro
306

  
307
%macro RET 0
308
    RESTORE_XMM_INTERNAL rsp
309
    %if regs_used > 4
310
        pop r5
311
        pop r4
243 312
    %endif
313
    ret
244 314
%endmacro
245 315

  
246
%else ;=======================================================================
316
%macro REP_RET 0
317
    %if regs_used > 4 || xmm_regs_used > 6
318
        RET
319
    %else
320
        rep ret
321
    %endif
322
%endmacro
323

  
324
%elifdef ARCH_X86_64 ; *nix x64 ;=============================================
247 325

  
248 326
DECLARE_REG 0, rdi, edi, di,  dil, edi
249 327
DECLARE_REG 1, rsi, esi, si,  sil, esi
......
261 339
    %endif
262 340
%endmacro
263 341

  
264
%endif ; !WIN64
265

  
266
%macro PROLOGUE 2-4+ 0 ; #args, #regs, pic, arg_names...
342
%macro PROLOGUE 2-4+ ; #args, #regs, #xmm_regs, arg_names...
267 343
    ASSERT %2 >= %1
268 344
    ASSERT %2 <= 7
269
    %assign stack_offset 0
270
%ifidn __OUTPUT_FORMAT__,win32
271
    LOAD_IF_USED 4, %1
272
    LOAD_IF_USED 5, %1
273
%endif
274 345
    LOAD_IF_USED 6, %1
275 346
    DEFINE_ARGS %4
276 347
%endmacro
......
315 386
    %endif
316 387
%endmacro
317 388

  
318
%macro PROLOGUE 2-4+ 0 ; #args, #regs, pic, arg_names...
389
%macro PROLOGUE 2-4+ ; #args, #regs, arg_names...
319 390
    ASSERT %2 >= %1
320
    %assign stack_offset 0
321 391
    %assign regs_used %2
322
    %ifdef PIC
323
    %if %3
324
        %assign regs_used regs_used+1
325
    %endif
326
    %endif
327 392
    ASSERT regs_used <= 7
328 393
    PUSH_IF_USED 3
329 394
    PUSH_IF_USED 4
......
336 401
    LOAD_IF_USED 4, %1
337 402
    LOAD_IF_USED 5, %1
338 403
    LOAD_IF_USED 6, %1
339
    %if %3
340
        picgetgot r%2
341
    %endif
342 404
    DEFINE_ARGS %4
343 405
%endmacro
344 406

  
......
382 444
    align function_align
383 445
    %1:
384 446
    RESET_MM_PERMUTATION ; not really needed, but makes disassembly somewhat nicer
447
    %assign stack_offset 0
385 448
    %if %0 > 1
386 449
        PROLOGUE %2
387 450
    %endif
......
389 452

  
390 453
%macro cextern 1
391 454
    %ifdef PREFIX
392
        extern _%1
393
        %define %1 _%1
394
    %else
395
        extern %1
455
        %xdefine %1 _%1
396 456
    %endif
457
    extern %1
397 458
%endmacro
398 459

  
399 460
; This is needed for ELF, otherwise the GNU linker assumes the stack is
......
523 584
    %assign %%i 0
524 585
    %rep num_mmregs
525 586
    CAT_XDEFINE m, %%i, %1_m %+ %%i
587
    CAT_XDEFINE n, m %+ %%i, %%i
526 588
    %assign %%i %%i+1
527 589
    %endrep
528 590
%endmacro
......
534 596
    %endif
535 597
%endmacro
536 598

  
537
; substitutions which are functionally identical but reduce code size
599
;Substitutions that reduce instruction size but are functionally equivalent
538 600
%define movdqa movaps
539 601
%define movdqu movups
540 602

  
603
%macro add 2
604
    %ifnum %2
605
        %if %2==128
606
            sub %1, -128
607
        %else
608
            add %1, %2
609
        %endif
610
    %else
611
        add %1, %2
612
    %endif
613
%endmacro
614

  
615
%macro sub 2
616
    %ifnum %2
617
        %if %2==128
618
            add %1, -128
619
        %else
620
            sub %1, %2
621
        %endif
622
    %else
623
        sub %1, %2
624
    %endif
625
%endmacro

Also available in: Unified diff