summaryrefslogtreecommitdiff
path: root/src/vm/arm64
diff options
context:
space:
mode:
authorFadi Hanna <fadim@microsoft.com>2019-04-01 12:07:47 -0700
committerGitHub <noreply@github.com>2019-04-01 12:07:47 -0700
commitbc9248cad132fa01dd2b641b6b22849bc7a05457 (patch)
tree5d1ee71059353a66004fc7a4d2501a7452db849f /src/vm/arm64
parentff43a803a814eaaa5eba02cafa4a91def3e4c7be (diff)
downloadcoreclr-bc9248cad132fa01dd2b641b6b22849bc7a05457.tar.gz
coreclr-bc9248cad132fa01dd2b641b6b22849bc7a05457.tar.bz2
coreclr-bc9248cad132fa01dd2b641b6b22849bc7a05457.zip
Enable R2R compilation/inlining of PInvoke stubs where no marshalling is required (#22560)
* These changes enable the inlining of some PInvokes that do not require any marshalling. With inlined pinvokes, R2R performance should become slightly better, since we'll avoid jitting some of the pinvoke IL stubs that we jit today for S.P.CoreLib. Performance gains not yet measured. * Added JIT_PInvokeBegin/End helpers for all architectures. Linux stubs not yet implemented * Add INLINE_GETTHREAD for arm/arm64 * Set CORJIT_FLAG_USE_PINVOKE_HELPERS jit flag for ReadyToRun compilations * Updating R2RDump tool to handle pinvokes
Diffstat (limited to 'src/vm/arm64')
-rw-r--r--src/vm/arm64/PInvokeStubs.asm87
-rw-r--r--src/vm/arm64/asmconstants.h17
-rw-r--r--src/vm/arm64/asmmacros.h48
-rw-r--r--src/vm/arm64/pinvokestubs.S24
4 files changed, 176 insertions, 0 deletions
diff --git a/src/vm/arm64/PInvokeStubs.asm b/src/vm/arm64/PInvokeStubs.asm
index 440af92b6b..87cb77f7a0 100644
--- a/src/vm/arm64/PInvokeStubs.asm
+++ b/src/vm/arm64/PInvokeStubs.asm
@@ -16,6 +16,13 @@
IMPORT VarargPInvokeStubWorker
IMPORT GenericPInvokeCalliStubWorker
+ IMPORT JIT_PInvokeEndRarePath
+
+ IMPORT s_gsCookie
+ IMPORT g_TrapReturningThreads
+
+ SETALIAS InlinedCallFrame_vftable, ??_7InlinedCallFrame@@6B@
+ IMPORT $InlinedCallFrame_vftable
; ------------------------------------------------------------------
@@ -107,9 +114,89 @@ __PInvokeStubWorkerName SETS "$FuncPrefix":CC:"StubWorker"
MEND
+
TEXTAREA
; ------------------------------------------------------------------
+; JIT_PInvokeBegin helper
+;
+; in:
+; x0 = InlinedCallFrame*
+;
+ LEAF_ENTRY JIT_PInvokeBegin
+
+ ldr x9, =s_gsCookie
+ ldr x9, [x9]
+ str x9, [x0]
+ add x10, x0, SIZEOF__GSCookie
+
+ ;; set first slot to the value of InlinedCallFrame::`vftable' (checked by runtime code)
+ ldr x9, =$InlinedCallFrame_vftable
+ str x9, [x10]
+
+ str xzr, [x10, #InlinedCallFrame__m_Datum]
+
+ mov x9, sp
+ str x9, [x10, #InlinedCallFrame__m_pCallSiteSP]
+ str fp, [x10, #InlinedCallFrame__m_pCalleeSavedFP]
+ str lr, [x10, #InlinedCallFrame__m_pCallerReturnAddress]
+
+ ;; x0 = GetThread(), TRASHES x9
+ INLINE_GETTHREAD x0, x9
+
+ ;; pFrame->m_Next = pThread->m_pFrame;
+ ldr x9, [x0, #Thread_m_pFrame]
+ str x9, [x10, #Frame__m_Next]
+
+ ;; pThread->m_pFrame = pFrame;
+ str x10, [x0, #Thread_m_pFrame]
+
+ ;; pThread->m_fPreemptiveGCDisabled = 0
+ str wzr, [x0, #Thread_m_fPreemptiveGCDisabled]
+
+ ret
+
+ LEAF_END
+
+; ------------------------------------------------------------------
+; JIT_PInvokeEnd helper
+;
+; in:
+; x0 = InlinedCallFrame*
+;
+ LEAF_ENTRY JIT_PInvokeEnd
+
+ add x0, x0, SIZEOF__GSCookie
+
+ ;; x1 = GetThread(), TRASHES x2
+ INLINE_GETTHREAD x1, x2
+
+ ;; x0 = pFrame
+ ;; x1 = pThread
+
+ ;; pThread->m_fPreemptiveGCDisabled = 1
+ mov x9, 1
+ str w9, [x1, #Thread_m_fPreemptiveGCDisabled]
+
+ ;; Check return trap
+ ldr x9, =g_TrapReturningThreads
+ ldr x9, [x9]
+ cbnz x9, RarePath
+
+ ;; pThread->m_pFrame = pFrame->m_Next
+ ldr x9, [x0, #Frame__m_Next]
+ str x9, [x1, #Thread_m_pFrame]
+
+ ret
+
+RarePath
+ b JIT_PInvokeEndRarePath
+
+ LEAF_END
+
+ INLINE_GETTHREAD_CONSTANT_POOL
+
+; ------------------------------------------------------------------
; VarargPInvokeStub & VarargPInvokeGenILStub
;
; in:
diff --git a/src/vm/arm64/asmconstants.h b/src/vm/arm64/asmconstants.h
index 1acc1b46d7..d2df47bab8 100644
--- a/src/vm/arm64/asmconstants.h
+++ b/src/vm/arm64/asmconstants.h
@@ -210,5 +210,22 @@ ASMCONSTANTS_C_ASSERT(DomainLocalModule__m_pDataBlob == offsetof(DomainLocalModu
ASMCONSTANTS_C_ASSERT(DomainLocalModule__m_pGCStatics == offsetof(DomainLocalModule, m_pGCStatics));
+// For JIT_PInvokeBegin and JIT_PInvokeEnd helpers
+#define Frame__m_Next 0x08
+ASMCONSTANTS_C_ASSERT(Frame__m_Next == offsetof(Frame, m_Next))
+
+#define InlinedCallFrame__m_Datum 0x10
+ASMCONSTANTS_C_ASSERT(InlinedCallFrame__m_Datum == offsetof(InlinedCallFrame, m_Datum))
+
+#define InlinedCallFrame__m_pCallSiteSP 0x20
+ASMCONSTANTS_C_ASSERT(InlinedCallFrame__m_pCallSiteSP == offsetof(InlinedCallFrame, m_pCallSiteSP))
+
+#define InlinedCallFrame__m_pCallerReturnAddress 0x28
+ASMCONSTANTS_C_ASSERT(InlinedCallFrame__m_pCallerReturnAddress == offsetof(InlinedCallFrame, m_pCallerReturnAddress))
+
+#define InlinedCallFrame__m_pCalleeSavedFP 0x30
+ASMCONSTANTS_C_ASSERT(InlinedCallFrame__m_pCalleeSavedFP == offsetof(InlinedCallFrame, m_pCalleeSavedFP))
+
+
#undef ASMCONSTANTS_RUNTIME_ASSERT
#undef ASMCONSTANTS_C_ASSERT
diff --git a/src/vm/arm64/asmmacros.h b/src/vm/arm64/asmmacros.h
index 5c6195b405..f7c83a60f0 100644
--- a/src/vm/arm64/asmmacros.h
+++ b/src/vm/arm64/asmmacros.h
@@ -299,3 +299,51 @@ $__RedirectionStubEndFuncName
MEND
+;-----------------------------------------------------------------------------
+; Macro to get a pointer to the Thread* object for the currently executing thread
+;
+__tls_array equ 0x58 ;; offsetof(TEB, ThreadLocalStoragePointer)
+
+ EXTERN _tls_index
+
+ GBLS __SECTIONREL_gCurrentThreadInfo
+__SECTIONREL_gCurrentThreadInfo SETS "SECTIONREL_gCurrentThreadInfo"
+
+ MACRO
+ INLINE_GETTHREAD $destReg, $trashReg
+
+ ;; The following macro variables are just some assembler magic to get the name of the 32-bit version
+ ;; of $trashReg. It does it by string manipulation. Replaces something like x3 with w3.
+ LCLS TrashRegister32Bit
+TrashRegister32Bit SETS "$trashReg"
+TrashRegister32Bit SETS "w":CC:("$TrashRegister32Bit":RIGHT:((:LEN:TrashRegister32Bit) - 1))
+
+ ldr $trashReg, =_tls_index
+ ldr $TrashRegister32Bit, [$trashReg]
+ ldr $destReg, [xpr, #__tls_array]
+ ldr $destReg, [$destReg, $trashReg lsl #3]
+ ldr $trashReg, =$__SECTIONREL_gCurrentThreadInfo
+ ldr $trashReg, [$trashReg]
+ ldr $destReg, [$destReg, $trashReg] ; return gCurrentThreadInfo.m_pThread
+ MEND
+
+;-----------------------------------------------------------------------------
+; INLINE_GETTHREAD_CONSTANT_POOL macro has to be used after the last function in the .asm file that used
+; INLINE_GETTHREAD. Optionally, it can be also used after any function that used INLINE_GETTHREAD
+; to improve density, or to reduce distance betweeen the constant pool and its use.
+;
+
+ MACRO
+ INLINE_GETTHREAD_CONSTANT_POOL
+
+ EXTERN gCurrentThreadInfo
+
+ ;; Section relocs are 32 bits. Using an extra DCD initialized to zero for 8-byte alignment.
+$__SECTIONREL_gCurrentThreadInfo
+ DCD gCurrentThreadInfo
+ RELOC 8, gCurrentThreadInfo ;; SECREL
+ DCD 0
+
+__SECTIONREL_gCurrentThreadInfo SETS "$__SECTIONREL_gCurrentThreadInfo":CC:"_"
+
+ MEND
diff --git a/src/vm/arm64/pinvokestubs.S b/src/vm/arm64/pinvokestubs.S
index ad64db855a..d0fd3958ea 100644
--- a/src/vm/arm64/pinvokestubs.S
+++ b/src/vm/arm64/pinvokestubs.S
@@ -87,6 +87,30 @@ LOCAL_LABEL(\__PInvokeStubFuncName\()_0):
.endm
// ------------------------------------------------------------------
+// IN:
+// InlinedCallFrame (x0) = pointer to the InlinedCallFrame data, including the GS cookie slot (GS cookie right
+// before actual InlinedCallFrame data)
+//
+//
+ LEAF_ENTRY JIT_PInvokeBegin, _TEXT
+ // Not yet supported
+ EMIT_BREAKPOINT
+ ret lr
+ LEAF_END JIT_PInvokeBegin, _TEXT
+
+// ------------------------------------------------------------------
+// IN:
+// InlinedCallFrame (x0) = pointer to the InlinedCallFrame data, including the GS cookie slot (GS cookie right
+// before actual InlinedCallFrame data)
+//
+//
+ LEAF_ENTRY JIT_PInvokeEnd, _TEXT
+ // Not yet supported
+ EMIT_BREAKPOINT
+ ret lr
+ LEAF_END JIT_PInvokeEnd, _TEXT
+
+// ------------------------------------------------------------------
// VarargPInvokeStub & VarargPInvokeGenILStub
//
// in: