summaryrefslogtreecommitdiff
path: root/src/x86/win64.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/x86/win64.S')
-rw-r--r--src/x86/win64.S460
1 files changed, 460 insertions, 0 deletions
diff --git a/src/x86/win64.S b/src/x86/win64.S
new file mode 100644
index 0000000..6e91818
--- /dev/null
+++ b/src/x86/win64.S
@@ -0,0 +1,460 @@
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+/* Constants for ffi_call_win64 */
+#define STACK 0
+#define PREP_ARGS_FN 32
+#define ECIF 40
+#define CIF_BYTES 48
+#define CIF_FLAGS 56
+#define RVALUE 64
+#define FN 72
+
+/* ffi_call_win64 (void (*prep_args_fn)(char *, extended_cif *),
+ extended_cif *ecif, unsigned bytes, unsigned flags,
+ unsigned *rvalue, void (*fn)());
+ */
+
+#ifdef _MSC_VER
+PUBLIC ffi_call_win64
+
+EXTRN __chkstk:NEAR
+EXTRN ffi_closure_win64_inner:NEAR
+
+_TEXT SEGMENT
+
+;;; ffi_closure_win64 will be called with these registers set:
+;;; rax points to 'closure'
+;;; r11 contains a bit mask that specifies which of the
+;;; first four parameters are float or double
+;;;
+;;; It must move the parameters passed in registers to their stack location,
+;;; call ffi_closure_win64_inner for the actual work, then return the result.
+;;;
+ffi_closure_win64 PROC FRAME
+ ;; copy register arguments onto stack
+ test r11, 1
+ jne first_is_float
+ mov QWORD PTR [rsp+8], rcx
+ jmp second
+first_is_float:
+ movlpd QWORD PTR [rsp+8], xmm0
+
+second:
+ test r11, 2
+ jne second_is_float
+ mov QWORD PTR [rsp+16], rdx
+ jmp third
+second_is_float:
+ movlpd QWORD PTR [rsp+16], xmm1
+
+third:
+ test r11, 4
+ jne third_is_float
+ mov QWORD PTR [rsp+24], r8
+ jmp fourth
+third_is_float:
+ movlpd QWORD PTR [rsp+24], xmm2
+
+fourth:
+ test r11, 8
+ jne fourth_is_float
+ mov QWORD PTR [rsp+32], r9
+ jmp done
+fourth_is_float:
+ movlpd QWORD PTR [rsp+32], xmm3
+
+done:
+ .ALLOCSTACK 40
+ sub rsp, 40
+ .ENDPROLOG
+ mov rcx, rax ; context is first parameter
+ mov rdx, rsp ; stack is second parameter
+ add rdx, 48 ; point to start of arguments
+ mov rax, ffi_closure_win64_inner
+ call rax ; call the real closure function
+ add rsp, 40
+ movd xmm0, rax ; If the closure returned a float,
+ ; ffi_closure_win64_inner wrote it to rax
+ ret 0
+ffi_closure_win64 ENDP
+
+ffi_call_win64 PROC FRAME
+ ;; copy registers onto stack
+ mov QWORD PTR [rsp+32], r9
+ mov QWORD PTR [rsp+24], r8
+ mov QWORD PTR [rsp+16], rdx
+ mov QWORD PTR [rsp+8], rcx
+ .PUSHREG rbp
+ push rbp
+ .ALLOCSTACK 48
+ sub rsp, 48 ; 00000030H
+ .SETFRAME rbp, 32
+ lea rbp, QWORD PTR [rsp+32]
+ .ENDPROLOG
+
+ mov eax, DWORD PTR CIF_BYTES[rbp]
+ add rax, 15
+ and rax, -16
+ call __chkstk
+ sub rsp, rax
+ lea rax, QWORD PTR [rsp+32]
+ mov QWORD PTR STACK[rbp], rax
+
+ mov rdx, QWORD PTR ECIF[rbp]
+ mov rcx, QWORD PTR STACK[rbp]
+ call QWORD PTR PREP_ARGS_FN[rbp]
+
+ mov rsp, QWORD PTR STACK[rbp]
+
+ movlpd xmm3, QWORD PTR [rsp+24]
+ movd r9, xmm3
+
+ movlpd xmm2, QWORD PTR [rsp+16]
+ movd r8, xmm2
+
+ movlpd xmm1, QWORD PTR [rsp+8]
+ movd rdx, xmm1
+
+ movlpd xmm0, QWORD PTR [rsp]
+ movd rcx, xmm0
+
+ call QWORD PTR FN[rbp]
+ret_struct4b$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_4B
+ jne ret_struct2b$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ mov DWORD PTR [rcx], eax
+ jmp ret_void$
+
+ret_struct2b$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_2B
+ jne ret_struct1b$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ mov WORD PTR [rcx], ax
+ jmp ret_void$
+
+ret_struct1b$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SMALL_STRUCT_1B
+ jne ret_uint8$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ mov BYTE PTR [rcx], al
+ jmp ret_void$
+
+ret_uint8$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT8
+ jne ret_sint8$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ movzx rax, al
+ mov QWORD PTR [rcx], rax
+ jmp ret_void$
+
+ret_sint8$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT8
+ jne ret_uint16$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ movsx rax, al
+ mov QWORD PTR [rcx], rax
+ jmp ret_void$
+
+ret_uint16$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT16
+ jne ret_sint16$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ movzx rax, ax
+ mov QWORD PTR [rcx], rax
+ jmp SHORT ret_void$
+
+ret_sint16$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT16
+ jne ret_uint32$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ movsx rax, ax
+ mov QWORD PTR [rcx], rax
+ jmp SHORT ret_void$
+
+ret_uint32$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_UINT32
+ jne ret_sint32$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ mov eax, eax
+ mov QWORD PTR [rcx], rax
+ jmp SHORT ret_void$
+
+ret_sint32$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT32
+ jne ret_float$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ cdqe
+ mov QWORD PTR [rcx], rax
+ jmp SHORT ret_void$
+
+ret_float$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_FLOAT
+ jne SHORT ret_double$
+
+ mov rax, QWORD PTR RVALUE[rbp]
+ movss DWORD PTR [rax], xmm0
+ jmp SHORT ret_void$
+
+ret_double$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_DOUBLE
+ jne SHORT ret_sint64$
+
+ mov rax, QWORD PTR RVALUE[rbp]
+ movlpd QWORD PTR [rax], xmm0
+ jmp SHORT ret_void$
+
+ret_sint64$:
+ cmp DWORD PTR CIF_FLAGS[rbp], FFI_TYPE_SINT64
+ jne ret_void$
+
+ mov rcx, QWORD PTR RVALUE[rbp]
+ mov QWORD PTR [rcx], rax
+ jmp SHORT ret_void$
+
+ret_void$:
+ xor rax, rax
+
+ lea rsp, QWORD PTR [rbp+16]
+ pop rbp
+ ret 0
+ffi_call_win64 ENDP
+_TEXT ENDS
+END
+#else
+.text
+
+.extern _ffi_closure_win64_inner
+
+# ffi_closure_win64 will be called with these registers set:
+# rax points to 'closure'
+# r11 contains a bit mask that specifies which of the
+# first four parameters are float or double
+#
+# It must move the parameters passed in registers to their stack location,
+# call ffi_closure_win64_inner for the actual work, then return the result.
+#
+ .balign 16
+ .globl _ffi_closure_win64
+_ffi_closure_win64:
+ # copy register arguments onto stack
+ test $1,%r11
+ jne .Lfirst_is_float
+ mov %rcx, 8(%rsp)
+ jmp .Lsecond
+.Lfirst_is_float:
+ movlpd %xmm0, 8(%rsp)
+
+.Lsecond:
+ test $2, %r11
+ jne .Lsecond_is_float
+ mov %rdx, 16(%rsp)
+ jmp .Lthird
+.Lsecond_is_float:
+ movlpd %xmm1, 16(%rsp)
+
+.Lthird:
+ test $4, %r11
+ jne .Lthird_is_float
+ mov %r8,24(%rsp)
+ jmp .Lfourth
+.Lthird_is_float:
+ movlpd %xmm2, 24(%rsp)
+
+.Lfourth:
+ test $8, %r11
+ jne .Lfourth_is_float
+ mov %r9, 32(%rsp)
+ jmp .Ldone
+.Lfourth_is_float:
+ movlpd %xmm3, 32(%rsp)
+
+.Ldone:
+#.ALLOCSTACK 40
+ sub $40, %rsp
+#.ENDPROLOG
+ mov %rax, %rcx # context is first parameter
+ mov %rsp, %rdx # stack is second parameter
+ add $48, %rdx # point to start of arguments
+ mov $_ffi_closure_win64_inner, %rax
+ callq *%rax # call the real closure function
+ add $40, %rsp
+ movq %rax, %xmm0 # If the closure returned a float,
+ # ffi_closure_win64_inner wrote it to rax
+ retq
+.ffi_closure_win64_end:
+
+ .balign 16
+ .globl _ffi_call_win64
+_ffi_call_win64:
+ # copy registers onto stack
+ mov %r9,32(%rsp)
+ mov %r8,24(%rsp)
+ mov %rdx,16(%rsp)
+ mov %rcx,8(%rsp)
+ #.PUSHREG rbp
+ push %rbp
+ #.ALLOCSTACK 48
+ sub $48,%rsp
+ #.SETFRAME rbp, 32
+ lea 32(%rsp),%rbp
+ #.ENDPROLOG
+
+ mov CIF_BYTES(%rbp),%eax
+ add $15, %rax
+ and $-16, %rax
+ cmpq $0x1000, %rax
+ jb Lch_done
+Lch_probe:
+ subq $0x1000,%rsp
+ orl $0x0, (%rsp)
+ subq $0x1000,%rax
+ cmpq $0x1000,%rax
+ ja Lch_probe
+Lch_done:
+ subq %rax, %rsp
+ orl $0x0, (%rsp)
+ lea 32(%rsp), %rax
+ mov %rax, STACK(%rbp)
+
+ mov ECIF(%rbp), %rdx
+ mov STACK(%rbp), %rcx
+ callq *PREP_ARGS_FN(%rbp)
+
+ mov STACK(%rbp), %rsp
+
+ movlpd 24(%rsp), %xmm3
+ movd %xmm3, %r9
+
+ movlpd 16(%rsp), %xmm2
+ movd %xmm2, %r8
+
+ movlpd 8(%rsp), %xmm1
+ movd %xmm1, %rdx
+
+ movlpd (%rsp), %xmm0
+ movd %xmm0, %rcx
+
+ callq *FN(%rbp)
+.Lret_struct4b:
+ cmpl $FFI_TYPE_SMALL_STRUCT_4B, CIF_FLAGS(%rbp)
+ jne .Lret_struct2b
+
+ mov RVALUE(%rbp), %rcx
+ mov %eax, (%rcx)
+ jmp .Lret_void
+
+.Lret_struct2b:
+ cmpl $FFI_TYPE_SMALL_STRUCT_2B, CIF_FLAGS(%rbp)
+ jne .Lret_struct1b
+
+ mov RVALUE(%rbp), %rcx
+ mov %ax, (%rcx)
+ jmp .Lret_void
+
+.Lret_struct1b:
+ cmpl $FFI_TYPE_SMALL_STRUCT_1B, CIF_FLAGS(%rbp)
+ jne .Lret_uint8
+
+ mov RVALUE(%rbp), %rcx
+ mov %al, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint8:
+ cmpl $FFI_TYPE_UINT8, CIF_FLAGS(%rbp)
+ jne .Lret_sint8
+
+ mov RVALUE(%rbp), %rcx
+ movzbq %al, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint8:
+ cmpl $FFI_TYPE_SINT8, CIF_FLAGS(%rbp)
+ jne .Lret_uint16
+
+ mov RVALUE(%rbp), %rcx
+ movsbq %al, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint16:
+ cmpl $FFI_TYPE_UINT16, CIF_FLAGS(%rbp)
+ jne .Lret_sint16
+
+ mov RVALUE(%rbp), %rcx
+ movzwq %ax, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint16:
+ cmpl $FFI_TYPE_SINT16, CIF_FLAGS(%rbp)
+ jne .Lret_uint32
+
+ mov RVALUE(%rbp), %rcx
+ movswq %ax, %rax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_uint32:
+ cmpl $FFI_TYPE_UINT32, CIF_FLAGS(%rbp)
+ jne .Lret_sint32
+
+ mov RVALUE(%rbp), %rcx
+ movl %eax, %eax
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_sint32:
+ cmpl $FFI_TYPE_SINT32, CIF_FLAGS(%rbp)
+ jne .Lret_float
+
+ mov RVALUE(%rbp), %rcx
+ cltq
+ movq %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_float:
+ cmpl $FFI_TYPE_FLOAT, CIF_FLAGS(%rbp)
+ jne .Lret_double
+
+ mov RVALUE(%rbp), %rax
+ movss %xmm0, (%rax)
+ jmp .Lret_void
+
+.Lret_double:
+ cmpl $FFI_TYPE_DOUBLE, CIF_FLAGS(%rbp)
+ jne .Lret_sint64
+
+ mov RVALUE(%rbp), %rax
+ movlpd %xmm0, (%rax)
+ jmp .Lret_void
+
+.Lret_sint64:
+ cmpl $FFI_TYPE_SINT64, CIF_FLAGS(%rbp)
+ jne .Lret_void
+
+ mov RVALUE(%rbp), %rcx
+ mov %rax, (%rcx)
+ jmp .Lret_void
+
+.Lret_void:
+ xor %rax, %rax
+
+ lea 16(%rbp), %rsp
+ pop %rbp
+ retq
+.ffi_call_win64_end:
+#endif /* !_MSC_VER */
+