diff options
Diffstat (limited to 'src/vm/amd64/UMThunkStub.asm')
-rw-r--r-- | src/vm/amd64/UMThunkStub.asm | 618 |
1 files changed, 618 insertions, 0 deletions
diff --git a/src/vm/amd64/UMThunkStub.asm b/src/vm/amd64/UMThunkStub.asm new file mode 100644 index 0000000000..ad3f17c854 --- /dev/null +++ b/src/vm/amd64/UMThunkStub.asm @@ -0,0 +1,618 @@ +; Licensed to the .NET Foundation under one or more agreements. +; The .NET Foundation licenses this file to you under the MIT license. +; See the LICENSE file in the project root for more information. + +; ==++== +; + +; +; ==--== + +include <AsmMacros.inc> +include AsmConstants.inc + +ifdef FEATURE_MIXEDMODE +IJWNOADThunk__MakeCall equ ?MakeCall@IJWNOADThunk@@KAXXZ +IJWNOADThunk__FindThunkTarget equ ?FindThunkTarget@IJWNOADThunk@@QEAAPEBXXZ +endif +gfHostConfig equ ?g_fHostConfig@@3KA +NDirect__IsHostHookEnabled equ ?IsHostHookEnabled@NDirect@@SAHXZ + +extern CreateThreadBlockThrow:proc +extern TheUMEntryPrestubWorker:proc +ifdef FEATURE_MIXEDMODE +extern IJWNOADThunk__FindThunkTarget:proc +endif +extern UMEntryPrestubUnwindFrameChainHandler:proc +extern UMThunkStubUnwindFrameChainHandler:proc +extern g_TrapReturningThreads:dword +extern UM2MDoADCallBack:proc +extern ReverseEnterRuntimeHelper:proc +extern ReverseLeaveRuntimeHelper:proc +ifdef FEATURE_INCLUDE_ALL_INTERFACES +extern gfHostConfig:dword +extern NDirect__IsHostHookEnabled:proc +endif +extern UMThunkStubRareDisableWorker:proc +extern ReversePInvokeBadTransition:proc + +; +; METHODDESC_REGISTER: UMEntryThunk* +; +NESTED_ENTRY TheUMEntryPrestub, _TEXT, UMEntryPrestubUnwindFrameChainHandler + +TheUMEntryPrestub_STACK_FRAME_SIZE = SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES + +; XMM save area +TheUMEntryPrestub_XMM_SAVE_OFFSET = TheUMEntryPrestub_STACK_FRAME_SIZE +TheUMEntryPrestub_STACK_FRAME_SIZE = TheUMEntryPrestub_STACK_FRAME_SIZE + SIZEOF_MAX_FP_ARG_SPILL + +; Ensure that the new rsp will be 16-byte aligned. Note that the caller has +; already pushed the return address. +if ((TheUMEntryPrestub_STACK_FRAME_SIZE + 8) MOD 16) ne 0 +TheUMEntryPrestub_STACK_FRAME_SIZE = TheUMEntryPrestub_STACK_FRAME_SIZE + 8 +endif + + alloc_stack TheUMEntryPrestub_STACK_FRAME_SIZE + + save_reg_postrsp rcx, TheUMEntryPrestub_STACK_FRAME_SIZE + 8h + save_reg_postrsp rdx, TheUMEntryPrestub_STACK_FRAME_SIZE + 10h + save_reg_postrsp r8, TheUMEntryPrestub_STACK_FRAME_SIZE + 18h + save_reg_postrsp r9, TheUMEntryPrestub_STACK_FRAME_SIZE + 20h + + save_xmm128_postrsp xmm0, TheUMEntryPrestub_XMM_SAVE_OFFSET + save_xmm128_postrsp xmm1, TheUMEntryPrestub_XMM_SAVE_OFFSET + 10h + save_xmm128_postrsp xmm2, TheUMEntryPrestub_XMM_SAVE_OFFSET + 20h + save_xmm128_postrsp xmm3, TheUMEntryPrestub_XMM_SAVE_OFFSET + 30h + + END_PROLOGUE + + ; + ; Do prestub-specific stuff + ; + mov rcx, METHODDESC_REGISTER + call TheUMEntryPrestubWorker + + ; + ; we're going to tail call to the exec stub that we just setup + ; + + mov rcx, [rsp + TheUMEntryPrestub_STACK_FRAME_SIZE + 8h] + mov rdx, [rsp + TheUMEntryPrestub_STACK_FRAME_SIZE + 10h] + mov r8, [rsp + TheUMEntryPrestub_STACK_FRAME_SIZE + 18h] + mov r9, [rsp + TheUMEntryPrestub_STACK_FRAME_SIZE + 20h] + + movdqa xmm0, xmmword ptr [rsp + TheUMEntryPrestub_XMM_SAVE_OFFSET] + movdqa xmm1, xmmword ptr [rsp + TheUMEntryPrestub_XMM_SAVE_OFFSET + 10h] + movdqa xmm2, xmmword ptr [rsp + TheUMEntryPrestub_XMM_SAVE_OFFSET + 20h] + movdqa xmm3, xmmword ptr [rsp + TheUMEntryPrestub_XMM_SAVE_OFFSET + 30h] + + ; + ; epilogue + ; + add rsp, TheUMEntryPrestub_STACK_FRAME_SIZE + TAILJMP_RAX + +NESTED_END TheUMEntryPrestub, _TEXT + + +; +; METHODDESC_REGISTER: UMEntryThunk* +; +NESTED_ENTRY UMThunkStub, _TEXT, UMThunkStubUnwindFrameChainHandler + +UMThunkStubAMD64_STACK_FRAME_SIZE = 0 + +; number of integer registers saved in prologue +UMThunkStubAMD64_NUM_REG_PUSHES = 2 +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + (UMThunkStubAMD64_NUM_REG_PUSHES * 8) + +; rare path spill area +UMThunkStubAMD64_RARE_PATH_SPILL_SIZE = 10h +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + UMThunkStubAMD64_RARE_PATH_SPILL_SIZE +UMThunkStubAMD64_RARE_PATH_SPILL_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE + + + +; HOST_NOTIFY_FLAG +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + 8 +UMThunkStubAMD64_HOST_NOTIFY_FLAG_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE + +; XMM save area +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + SIZEOF_MAX_FP_ARG_SPILL + +; Ensure that the offset of the XMM save area will be 16-byte aligned. +if ((UMThunkStubAMD64_STACK_FRAME_SIZE + 8) MOD 16) ne 0 ; +8 for caller-pushed return address +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + 8 +endif + +UMThunkStubAMD64_XMM_SAVE_NEGOFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE + +; Add in the callee scratch area size. +UMThunkStubAMD64_CALLEE_SCRATCH_SIZE = SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES +UMThunkStubAMD64_STACK_FRAME_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE + UMThunkStubAMD64_CALLEE_SCRATCH_SIZE + +; Now we have the full size of the stack frame. The offsets have been computed relative to the +; top, so negate them to make them relative to the post-prologue rsp. +UMThunkStubAMD64_FRAME_OFFSET = UMThunkStubAMD64_CALLEE_SCRATCH_SIZE +UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_RARE_PATH_SPILL_NEGOFFSET +UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_HOST_NOTIFY_FLAG_NEGOFFSET +UMThunkStubAMD64_XMM_SAVE_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE - UMThunkStubAMD64_FRAME_OFFSET - UMThunkStubAMD64_XMM_SAVE_NEGOFFSET +UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET = UMThunkStubAMD64_STACK_FRAME_SIZE + 8 - UMThunkStubAMD64_FRAME_OFFSET ; +8 for return address +UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE = UMThunkStubAMD64_STACK_FRAME_SIZE - (UMThunkStubAMD64_NUM_REG_PUSHES * 8) + +.errnz UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET - UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET, update UMTHUNKSTUB_HOST_NOTIFY_FLAG_RBPOFFSET + + +; +; [ callee scratch ] <-- new RSP +; [ callee scratch ] +; [ callee scratch ] +; [ callee scratch ] +; {optional stack args passed to callee} +; xmm0 <-- RBP +; xmm1 +; xmm2 +; xmm3 +; {optional padding to align xmm regs} +; HOST_NOTIFY_FLAG (needs to make ReverseLeaveRuntime call flag) +; [rare path spill area] +; [rare path spill area] +; rbp save +; r12 save +; return address <-- entry RSP +; [rcx home] +; [rdx home] +; [r8 home] +; [r9 home] +; stack arg 0 +; stack arg 1 +; ... + + push_nonvol_reg r12 + push_nonvol_reg rbp ; stack_args + alloc_stack UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE + set_frame rbp, UMThunkStubAMD64_FRAME_OFFSET ; stack_args + mov byte ptr [rbp + UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET], 0 ; hosted + END_PROLOGUE + + ; + ; Call GetThread() + ; + CALL_GETTHREAD ; will not trash r10 + test rax, rax + jz DoThreadSetup + +HaveThread: + + mov r12, rax ; r12 <- Thread* + + ;FailFast if a native callable method invoked via ldftn and calli. + cmp dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1 + jz InvalidTransition + + ; + ; disable preemptive GC + ; + mov dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 1 + + ; + ; catch returning thread here if a GC is in progress + ; + cmp [g_TrapReturningThreads], 0 + jnz DoTrapReturningThreadsTHROW + +InCooperativeMode: + +ifdef FEATURE_INCLUDE_ALL_INTERFACES + test [gfHostConfig], ASM_CLRTASKHOSTED ; inlined NDirect::IsHostHookEnabled ; hosted +ifdef _DEBUG + call IsHostHookEnabledHelper + test eax, eax +endif ; _DEBUG + jnz NotifyHost_ReverseEnterRuntime ; hosted +Done_NotifyHost_ReverseEnterRuntime: +endif + + mov rax, [r12 + OFFSETOF__Thread__m_pDomain] + mov eax, [rax + OFFSETOF__AppDomain__m_dwId] + + mov r11d, [METHODDESC_REGISTER + OFFSETOF__UMEntryThunk__m_dwDomainId] + + cmp rax, r11 + jne WrongAppDomain + + mov r11, [METHODDESC_REGISTER + OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo] + mov eax, [r11 + OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize] ; stack_args + test rax, rax ; stack_args + jnz CopyStackArgs ; stack_args + +ArgumentsSetup: + + mov rax, [r11 + OFFSETOF__UMThunkMarshInfo__m_pILStub] ; rax <- Stub* + call rax + +PostCall: + ; + ; enable preemptive GC + ; + mov dword ptr [r12 + OFFSETOF__Thread__m_fPreemptiveGCDisabled], 0 + +ifdef FEATURE_INCLUDE_ALL_INTERFACES + cmp byte ptr [rbp + UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET], 0 ; hosted + jnz NotifyHost_ReverseLeaveRuntime ; hosted +Done_NotifyHost_ReverseLeaveRuntime: +endif + + ; epilog + lea rsp, [rbp - UMThunkStubAMD64_FRAME_OFFSET + UMThunkStubAMD64_FIXED_STACK_ALLOC_SIZE] + pop rbp ; stack_args + pop r12 + ret + + +DoThreadSetup: + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8 + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9 + + ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls + ; initial measurements indidcate that this could be worth about a 5% savings in reverse + ; pinvoke overhead. + movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h], xmm0 + movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1 + movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2 + movdqa xmmword ptr[rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3 + + mov [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER + call CreateThreadBlockThrow + mov METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET] + + mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h] + mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h] + mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h] + mov r9, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h] + + ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls + movdqa xmm0, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h] + movdqa xmm1, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h] + movdqa xmm2, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h] + movdqa xmm3, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h] + + jmp HaveThread + +InvalidTransition: + ; ReversePInvokeBadTransition will failfast + call ReversePInvokeBadTransition + +DoTrapReturningThreadsTHROW: + + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8 + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9 + + ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls + ; initial measurements indidcate that this could be worth about a 5% savings in reverse + ; pinvoke overhead. + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h], xmm0 + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1 + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2 + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3 + + mov [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER + mov rcx, r12 ; Thread* pThread + mov rdx, METHODDESC_REGISTER ; UMEntryThunk* pUMEntry + call UMThunkStubRareDisableWorker + mov METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET] + + mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h] + mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h] + mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h] + mov r9, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h] + + ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls + movdqa xmm0, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h] + movdqa xmm1, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h] + movdqa xmm2, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h] + movdqa xmm3, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h] + + jmp InCooperativeMode + +CopyStackArgs: + ; rax = cbStackArgs (with 20h for register args subtracted out already) + + sub rsp, rax + and rsp, -16 + + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8 + + ; rax = number of bytes + + lea rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES] + lea rdx, [rsp + UMThunkStubAMD64_CALLEE_SCRATCH_SIZE] + +CopyLoop: + ; rax = number of bytes + ; rcx = src + ; rdx = dest + ; r8 = sratch + + add rax, -8 + mov r8, [rcx + rax] + mov [rdx + rax], r8 + jnz CopyLoop + + mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h] + mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h] + mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h] + + jmp ArgumentsSetup + +ifdef FEATURE_INCLUDE_ALL_INTERFACES +NotifyHost_ReverseEnterRuntime: + mov [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET], METHODDESC_REGISTER + + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8 + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9 + + ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls + ; initial measurements indidcate that this could be worth about a 5% savings in reverse + ; pinvoke overhead. + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h], xmm0 + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1 + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2 + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3 + + mov rcx, r12 + call ReverseEnterRuntimeHelper + mov byte ptr [rbp + UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET], 1 + + mov rcx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h] + mov rdx, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h] + mov r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h] + mov r9, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h] + + ; @CONSIDER: mark UMEntryThunks that have FP params and only save/restore xmm regs on those calls + movdqa xmm0, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h] + movdqa xmm1, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h] + movdqa xmm2, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h] + movdqa xmm3, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h] + + mov METHODDESC_REGISTER, [rbp + UMThunkStubAMD64_RARE_PATH_SPILL_OFFSET] + + jmp Done_NotifyHost_ReverseEnterRuntime + +NotifyHost_ReverseLeaveRuntime: + + ; save rax, xmm0 + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rax + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h], xmm0 + + mov rcx, r12 + call ReverseLeaveRuntimeHelper + mov byte ptr [rbp + UMThunkStubAMD64_HOST_NOTIFY_FLAG_OFFSET], 0 + + ; restore rax, xmm0 + mov rax, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h] + movdqa xmm0, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h] + + jmp Done_NotifyHost_ReverseLeaveRuntime +endif + +WrongAppDomain: + ; + ; home register args to the stack + ; + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], rcx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 8h], rdx + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h], r8 + mov [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 18h], r9 + + ; + ; save off xmm registers + ; + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h], xmm0 + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 10h], xmm1 + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 20h], xmm2 + movdqa xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 30h], xmm3 + + ; + ; call our helper to perform the AD transtion + ; + mov rcx, METHODDESC_REGISTER + lea r8, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET] + mov rax, [METHODDESC_REGISTER + OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo] + mov r9d, [rax + OFFSETOF__UMThunkMarshInfo__m_cbActualArgSize] + call UM2MDoADCallBack + + ; restore return value + mov rax, [rbp + UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h] + movdqa xmm0, xmmword ptr [rbp + UMThunkStubAMD64_XMM_SAVE_OFFSET + 0h] + + jmp PostCall + +NESTED_END UMThunkStub, _TEXT + +; +; EXTERN_C void __stdcall UM2MThunk_WrapperHelper( +; void *pThunkArgs, ; rcx +; int argLen, ; rdx +; void *pAddr, ; r8 // not used +; UMEntryThunk *pEntryThunk, ; r9 +; Thread *pThread); ; [entry_sp + 28h] +; +NESTED_ENTRY UM2MThunk_WrapperHelper, _TEXT + + +UM2MThunk_WrapperHelper_STACK_FRAME_SIZE = 0 + +; number of integer registers saved in prologue +UM2MThunk_WrapperHelper_NUM_REG_PUSHES = 3 +UM2MThunk_WrapperHelper_STACK_FRAME_SIZE = UM2MThunk_WrapperHelper_STACK_FRAME_SIZE + (UM2MThunk_WrapperHelper_NUM_REG_PUSHES * 8) + +UM2MThunk_WrapperHelper_CALLEE_SCRATCH_SIZE = SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES +UM2MThunk_WrapperHelper_STACK_FRAME_SIZE = UM2MThunk_WrapperHelper_STACK_FRAME_SIZE + UM2MThunk_WrapperHelper_CALLEE_SCRATCH_SIZE + +; Ensure that rsp remains 16-byte aligned +if ((UM2MThunk_WrapperHelper_STACK_FRAME_SIZE + 8) MOD 16) ne 0 ; +8 for caller-pushed return address +UM2MThunk_WrapperHelper_STACK_FRAME_SIZE = UM2MThunk_WrapperHelper_STACK_FRAME_SIZE + 8 +endif + +UM2MThunk_WrapperHelper_FRAME_OFFSET = UM2MThunk_WrapperHelper_CALLEE_SCRATCH_SIZE +UM2MThunk_WrapperHelper_FIXED_STACK_ALLOC_SIZE = UM2MThunk_WrapperHelper_STACK_FRAME_SIZE - (UM2MThunk_WrapperHelper_NUM_REG_PUSHES * 8) + + push_nonvol_reg rsi + push_nonvol_reg rdi + push_nonvol_reg rbp + alloc_stack UM2MThunk_WrapperHelper_FIXED_STACK_ALLOC_SIZE + set_frame rbp, UM2MThunk_WrapperHelper_FRAME_OFFSET + END_PROLOGUE + + ; + ; We are in cooperative mode and in the correct domain. + ; The host has also been notified that we've entered the + ; runtime. All we have left to do is to copy the stack, + ; setup the register args and then call the managed target + ; + + test rdx, rdx + jg CopyStackArgs + +ArgumentsSetup: + mov METHODDESC_REGISTER, r9 + + mov rsi, rcx ; rsi <- pThunkArgs + mov rcx, [rsi + 0h] + mov rdx, [rsi + 8h] + mov r8, [rsi + 10h] + mov r9, [rsi + 18h] + + movdqa xmm0, xmmword ptr [rsi + UMThunkStubAMD64_XMM_SAVE_OFFSET - UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h] + movdqa xmm1, xmmword ptr [rsi + UMThunkStubAMD64_XMM_SAVE_OFFSET - UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 10h] + movdqa xmm2, xmmword ptr [rsi + UMThunkStubAMD64_XMM_SAVE_OFFSET - UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 20h] + movdqa xmm3, xmmword ptr [rsi + UMThunkStubAMD64_XMM_SAVE_OFFSET - UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 30h] + + mov rax, [METHODDESC_REGISTER + OFFSETOF__UMEntryThunk__m_pUMThunkMarshInfo] ; rax <- UMThunkMarshInfo* + mov rax, [rax + OFFSETOF__UMThunkMarshInfo__m_pILStub] ; rax <- Stub* + call rax + + ; make sure we don't trash the return value + mov [rsi + 0h], rax + movdqa xmmword ptr [rsi + UMThunkStubAMD64_XMM_SAVE_OFFSET - UMThunkStubAMD64_ARGUMENTS_STACK_HOME_OFFSET + 0h], xmm0 + + lea rsp, [rbp - UM2MThunk_WrapperHelper_FRAME_OFFSET + UM2MThunk_WrapperHelper_FIXED_STACK_ALLOC_SIZE] + pop rbp + pop rdi + pop rsi + ret + + +CopyStackArgs: + ; rdx = cbStackArgs (with 20h for register args subtracted out already) + ; rcx = pSrcArgStack + + sub rsp, rdx + and rsp, -16 + + mov r8, rcx + + lea rsi, [rcx + SIZEOF_MAX_OUTGOING_ARGUMENT_HOMES] + lea rdi, [rsp + UM2MThunk_WrapperHelper_CALLEE_SCRATCH_SIZE] + + mov rcx, rdx + shr rcx, 3 + + rep movsq + + mov rcx, r8 + + jmp ArgumentsSetup + +NESTED_END UM2MThunk_WrapperHelper, _TEXT + +ifdef _DEBUG +ifdef FEATURE_INCLUDE_ALL_INTERFACES + +NESTED_ENTRY IsHostHookEnabledHelper, _TEXT + + push_nonvol_reg rcx + push_nonvol_reg rdx + push_nonvol_reg r8 + push_nonvol_reg r9 + push_nonvol_reg r10 + +IsHostHookEnabledHelper_FIXED_STACK_ALLOC_SIZE = 20h + 40h + + alloc_stack IsHostHookEnabledHelper_FIXED_STACK_ALLOC_SIZE + + END_PROLOGUE + + movdqa xmmword ptr [rsp + 20h + 0h], xmm0 + movdqa xmmword ptr [rsp + 20h + 10h], xmm1 + movdqa xmmword ptr [rsp + 20h + 20h], xmm2 + movdqa xmmword ptr [rsp + 20h + 30h], xmm3 + + call NDirect__IsHostHookEnabled + + movdqa xmm0, xmmword ptr [rsp + 20h + 0h] + movdqa xmm1, xmmword ptr [rsp + 20h + 10h] + movdqa xmm2, xmmword ptr [rsp + 20h + 20h] + movdqa xmm3, xmmword ptr [rsp + 20h + 30h] + + ; epilog + add rsp, IsHostHookEnabledHelper_FIXED_STACK_ALLOC_SIZE + pop r10 + pop r9 + pop r8 + pop rdx + pop rcx + ret +NESTED_END IsHostHookEnabledHelper, _TEXT + +endif ; FEATURE_INCLUDE_ALL_INTERFACES +endif ; _DEBUG + +ifdef FEATURE_MIXEDMODE +NESTED_ENTRY IJWNOADThunk__MakeCall, _TEXT + ; METHODDESC_REGISTER = IJWNOADThunk* + + alloc_stack 68h + + save_reg_postrsp rcx, 70h + save_reg_postrsp rdx, 78h + save_reg_postrsp r8, 80h + save_reg_postrsp r9, 88h + + save_xmm128_postrsp xmm0, 20h + save_xmm128_postrsp xmm1, 30h + save_xmm128_postrsp xmm2, 40h + save_xmm128_postrsp xmm3, 50h + END_PROLOGUE + + mov rcx, METHODDESC_REGISTER + call IJWNOADThunk__FindThunkTarget + + movdqa xmm0, xmmword ptr [rsp + 20h] + movdqa xmm1, xmmword ptr [rsp + 30h] + movdqa xmm2, xmmword ptr [rsp + 40h] + movdqa xmm3, xmmword ptr [rsp + 50h] + + mov rcx, [rsp + 70h] + mov rdx, [rsp + 78h] + mov r8, [rsp + 80h] + mov r9 , [rsp + 88h] + + ; The target is in rax + add rsp, 68h + TAILJMP_RAX +NESTED_END IJWNOADThunk__MakeCall, _TEXT +endif ; FEATURE_MIXEDMODE + + end + |