summaryrefslogtreecommitdiff
path: root/src/vm/amd64/cgenamd64.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm/amd64/cgenamd64.cpp')
-rw-r--r--src/vm/amd64/cgenamd64.cpp1278
1 files changed, 1278 insertions, 0 deletions
diff --git a/src/vm/amd64/cgenamd64.cpp b/src/vm/amd64/cgenamd64.cpp
new file mode 100644
index 0000000000..51aac1ebc6
--- /dev/null
+++ b/src/vm/amd64/cgenamd64.cpp
@@ -0,0 +1,1278 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+// Various helper routines for generating AMD64 assembly code.
+//
+
+// Precompiled Header
+
+#include "common.h"
+
+#include "stublink.h"
+#include "cgensys.h"
+#include "siginfo.hpp"
+#include "excep.h"
+#include "ecall.h"
+#include "dllimport.h"
+#include "dllimportcallback.h"
+#include "dbginterface.h"
+#include "fcall.h"
+#include "array.h"
+#include "virtualcallstub.h"
+#include "jitinterface.h"
+
+#ifdef FEATURE_COMINTEROP
+#include "clrtocomcall.h"
+#endif // FEATURE_COMINTEROP
+
+void UpdateRegDisplayFromCalleeSavedRegisters(REGDISPLAY * pRD, CalleeSavedRegisters * pRegs)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ T_CONTEXT * pContext = pRD->pCurrentContext;
+#define CALLEE_SAVED_REGISTER(regname) pContext->regname = pRegs->regname;
+ ENUM_CALLEE_SAVED_REGISTERS();
+#undef CALLEE_SAVED_REGISTER
+
+ KNONVOLATILE_CONTEXT_POINTERS * pContextPointers = pRD->pCurrentContextPointers;
+#define CALLEE_SAVED_REGISTER(regname) pContextPointers->regname = (PULONG64)&pRegs->regname;
+ ENUM_CALLEE_SAVED_REGISTERS();
+#undef CALLEE_SAVED_REGISTER
+}
+
+void ClearRegDisplayArgumentAndScratchRegisters(REGDISPLAY * pRD)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ KNONVOLATILE_CONTEXT_POINTERS * pContextPointers = pRD->pCurrentContextPointers;
+ pContextPointers->Rax = NULL;
+#ifdef UNIX_AMD64_ABI
+ pContextPointers->Rsi = NULL;
+ pContextPointers->Rdi = NULL;
+#endif
+ pContextPointers->Rcx = NULL;
+ pContextPointers->Rdx = NULL;
+ pContextPointers->R8 = NULL;
+ pContextPointers->R9 = NULL;
+ pContextPointers->R10 = NULL;
+ pContextPointers->R11 = NULL;
+}
+
+void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ pRD->IsCallerContextValid = FALSE;
+ pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary.
+
+ pRD->pCurrentContext->Rip = GetReturnAddress();
+ pRD->pCurrentContext->Rsp = GetSP();
+
+ UpdateRegDisplayFromCalleeSavedRegisters(pRD, GetCalleeSavedRegisters());
+ ClearRegDisplayArgumentAndScratchRegisters(pRD);
+
+ SyncRegDisplayToCurrentContext(pRD);
+
+ LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK TransitionFrame::UpdateRegDisplay(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP));
+}
+
+#ifndef DACCESS_COMPILE
+
+extern "C" TADDR s_pStubHelperFrameVPtr;
+TADDR s_pStubHelperFrameVPtr = StubHelperFrame::GetMethodFrameVPtr();
+
+void TailCallFrame::InitFromContext(T_CONTEXT * pContext)
+{
+ WRAPPER_NO_CONTRACT;
+
+#define CALLEE_SAVED_REGISTER(regname) m_calleeSavedRegisters.regname = pContext->regname;
+ ENUM_CALLEE_SAVED_REGISTERS();
+#undef CALLEE_SAVED_REGISTER
+
+ m_pGCLayout = 0;
+ m_ReturnAddress = pContext->Rip;
+}
+
+#endif // !DACCESS_COMPILE
+
+void TailCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ pRD->IsCallerContextValid = FALSE;
+ pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary.
+
+ pRD->pCurrentContext->Rip = m_ReturnAddress;
+ pRD->pCurrentContext->Rsp = dac_cast<TADDR>(this) + sizeof(*this);
+
+ UpdateRegDisplayFromCalleeSavedRegisters(pRD, &m_calleeSavedRegisters);
+ ClearRegDisplayArgumentAndScratchRegisters(pRD);
+
+ SyncRegDisplayToCurrentContext(pRD);
+
+ LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK TransitionFrame::UpdateRegDisplay(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP));
+}
+
+void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+#ifdef PROFILING_SUPPORTED
+ PRECONDITION(CORProfilerStackSnapshotEnabled() || InlinedCallFrame::FrameHasActiveCall(this));
+#endif
+ HOST_NOCALLS;
+ MODE_ANY;
+ SUPPORTS_DAC;
+ }
+ CONTRACTL_END;
+
+ if (!InlinedCallFrame::FrameHasActiveCall(this))
+ {
+ LOG((LF_CORDB, LL_ERROR, "WARNING: InlinedCallFrame::UpdateRegDisplay called on inactive frame %p\n", this));
+ return;
+ }
+
+ pRD->IsCallerContextValid = FALSE;
+ pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary.
+
+ pRD->pCurrentContext->Rip = *(DWORD64 *)&m_pCallerReturnAddress;
+ pRD->pCurrentContext->Rsp = *(DWORD64 *)&m_pCallSiteSP;
+ pRD->pCurrentContext->Rbp = *(DWORD64 *)&m_pCalleeSavedFP;
+
+ ClearRegDisplayArgumentAndScratchRegisters(pRD);
+
+#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContextPointers->regname = NULL;
+ ENUM_CALLEE_SAVED_REGISTERS();
+#undef CALLEE_SAVED_REGISTER
+
+ pRD->pCurrentContextPointers->Rbp = (DWORD64 *)&m_pCalleeSavedFP;
+
+ SyncRegDisplayToCurrentContext(pRD);
+
+ LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK InlinedCallFrame::UpdateRegDisplay(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP));
+}
+
+void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ PRECONDITION(m_MachState._pRetAddr == PTR_TADDR(&m_MachState.m_Rip));
+ SUPPORTS_DAC;
+ }
+ CONTRACTL_END;
+
+ pRD->IsCallerContextValid = FALSE;
+ pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary.
+
+ //
+ // Copy the saved state from the frame to the current context.
+ //
+
+ LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK HelperMethodFrame::UpdateRegDisplay cached ip:%p, sp:%p\n", m_MachState.m_Rip, m_MachState.m_Rsp));
+
+#if defined(DACCESS_COMPILE)
+ // For DAC, we may get here when the HMF is still uninitialized.
+ // So we may need to unwind here.
+ if (!m_MachState.isValid())
+ {
+ // This allocation throws on OOM.
+ MachState* pUnwoundState = (MachState*)DacAllocHostOnlyInstance(sizeof(*pUnwoundState), true);
+
+ InsureInit(false, pUnwoundState);
+
+ pRD->pCurrentContext->Rip = pRD->ControlPC = pUnwoundState->m_Rip;
+ pRD->pCurrentContext->Rsp = pRD->SP = pUnwoundState->m_Rsp;
+
+#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContext->regname = pUnwoundState->m_Capture.regname;
+ ENUM_CALLEE_SAVED_REGISTERS();
+#undef CALLEE_SAVED_REGISTER
+
+#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContextPointers->regname = pUnwoundState->m_Ptrs.p##regname;
+ ENUM_CALLEE_SAVED_REGISTERS();
+#undef CALLEE_SAVED_REGISTER
+
+ ClearRegDisplayArgumentAndScratchRegisters(pRD);
+
+ return;
+ }
+#endif // DACCESS_COMPILE
+
+ pRD->pCurrentContext->Rip = pRD->ControlPC = m_MachState.m_Rip;
+ pRD->pCurrentContext->Rsp = pRD->SP = m_MachState.m_Rsp;
+
+#ifdef FEATURE_PAL
+
+#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContext->regname = (m_MachState.m_Ptrs.p##regname != NULL) ? \
+ *m_MachState.m_Ptrs.p##regname : m_MachState.m_Unwound.regname;
+ ENUM_CALLEE_SAVED_REGISTERS();
+#undef CALLEE_SAVED_REGISTER
+
+#else // FEATURE_PAL
+
+#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContext->regname = *m_MachState.m_Ptrs.p##regname;
+ ENUM_CALLEE_SAVED_REGISTERS();
+#undef CALLEE_SAVED_REGISTER
+
+#endif // FEATURE_PAL
+
+#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContextPointers->regname = m_MachState.m_Ptrs.p##regname;
+ ENUM_CALLEE_SAVED_REGISTERS();
+#undef CALLEE_SAVED_REGISTER
+
+ //
+ // Clear all knowledge of scratch registers. We're skipping to any
+ // arbitrary point on the stack, and frames aren't required to preserve or
+ // keep track of these anyways.
+ //
+
+ ClearRegDisplayArgumentAndScratchRegisters(pRD);
+}
+
+void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ memcpy(pRD->pCurrentContext, &m_ctx, sizeof(CONTEXT));
+
+ pRD->ControlPC = m_ctx.Rip;
+
+ pRD->SP = m_ctx.Rsp;
+
+ pRD->pCurrentContextPointers->Rax = &m_ctx.Rax;
+ pRD->pCurrentContextPointers->Rcx = &m_ctx.Rcx;
+ pRD->pCurrentContextPointers->Rdx = &m_ctx.Rdx;
+ pRD->pCurrentContextPointers->Rbx = &m_ctx.Rbx;
+ pRD->pCurrentContextPointers->Rbp = &m_ctx.Rbp;
+ pRD->pCurrentContextPointers->Rsi = &m_ctx.Rsi;
+ pRD->pCurrentContextPointers->Rdi = &m_ctx.Rdi;
+ pRD->pCurrentContextPointers->R8 = &m_ctx.R8;
+ pRD->pCurrentContextPointers->R9 = &m_ctx.R9;
+ pRD->pCurrentContextPointers->R10 = &m_ctx.R10;
+ pRD->pCurrentContextPointers->R11 = &m_ctx.R11;
+ pRD->pCurrentContextPointers->R12 = &m_ctx.R12;
+ pRD->pCurrentContextPointers->R13 = &m_ctx.R13;
+ pRD->pCurrentContextPointers->R14 = &m_ctx.R14;
+ pRD->pCurrentContextPointers->R15 = &m_ctx.R15;
+
+ pRD->IsCallerContextValid = FALSE;
+ pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary.
+}
+
+#ifdef FEATURE_HIJACK
+TADDR ResumableFrame::GetReturnAddressPtr()
+{
+ LIMITED_METHOD_DAC_CONTRACT;
+ return dac_cast<TADDR>(m_Regs) + offsetof(CONTEXT, Rip);
+}
+
+void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ CONTRACT_VOID
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ SUPPORTS_DAC;
+ }
+ CONTRACT_END;
+
+ CopyMemory(pRD->pCurrentContext, m_Regs, sizeof(CONTEXT));
+
+ pRD->ControlPC = m_Regs->Rip;
+
+ pRD->SP = m_Regs->Rsp;
+
+ pRD->pCurrentContextPointers->Rax = &m_Regs->Rax;
+ pRD->pCurrentContextPointers->Rcx = &m_Regs->Rcx;
+ pRD->pCurrentContextPointers->Rdx = &m_Regs->Rdx;
+ pRD->pCurrentContextPointers->Rbx = &m_Regs->Rbx;
+ pRD->pCurrentContextPointers->Rbp = &m_Regs->Rbp;
+ pRD->pCurrentContextPointers->Rsi = &m_Regs->Rsi;
+ pRD->pCurrentContextPointers->Rdi = &m_Regs->Rdi;
+ pRD->pCurrentContextPointers->R8 = &m_Regs->R8;
+ pRD->pCurrentContextPointers->R9 = &m_Regs->R9;
+ pRD->pCurrentContextPointers->R10 = &m_Regs->R10;
+ pRD->pCurrentContextPointers->R11 = &m_Regs->R11;
+ pRD->pCurrentContextPointers->R12 = &m_Regs->R12;
+ pRD->pCurrentContextPointers->R13 = &m_Regs->R13;
+ pRD->pCurrentContextPointers->R14 = &m_Regs->R14;
+ pRD->pCurrentContextPointers->R15 = &m_Regs->R15;
+
+ pRD->IsCallerContextValid = FALSE;
+ pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary.
+
+ RETURN;
+}
+
+// The HijackFrame has to know the registers that are pushed by OnHijackTripThread
+void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD)
+{
+ CONTRACTL {
+ NOTHROW;
+ GC_NOTRIGGER;
+ SUPPORTS_DAC;
+ }
+ CONTRACTL_END;
+
+ pRD->IsCallerContextValid = FALSE;
+ pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary.
+
+ pRD->pCurrentContext->Rip = m_ReturnAddress;
+ pRD->pCurrentContext->Rsp = PTR_TO_MEMBER_TADDR(HijackArgs, m_Args, Rip) + sizeof(void *);
+
+ UpdateRegDisplayFromCalleeSavedRegisters(pRD, &(m_Args->Regs));
+
+#ifdef UNIX_AMD64_ABI
+ pRD->pCurrentContextPointers->Rsi = NULL;
+ pRD->pCurrentContextPointers->Rdi = NULL;
+#endif
+ pRD->pCurrentContextPointers->Rcx = NULL;
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ pRD->pCurrentContextPointers->Rdx = (PULONG64)&m_Args->Rdx;
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ pRD->pCurrentContextPointers->Rdx = NULL;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ pRD->pCurrentContextPointers->R8 = NULL;
+ pRD->pCurrentContextPointers->R9 = NULL;
+ pRD->pCurrentContextPointers->R10 = NULL;
+ pRD->pCurrentContextPointers->R11 = NULL;
+
+ pRD->pCurrentContextPointers->Rax = (PULONG64)&m_Args->Rax;
+
+ SyncRegDisplayToCurrentContext(pRD);
+
+/*
+ // This only describes the top-most frame
+ pRD->pContext = NULL;
+
+
+ pRD->PCTAddr = dac_cast<TADDR>(m_Args) + offsetof(HijackArgs, Rip);
+ //pRD->pPC = PTR_SLOT(pRD->PCTAddr);
+ pRD->SP = (ULONG64)(pRD->PCTAddr + sizeof(TADDR));
+*/
+}
+#endif // FEATURE_HIJACK
+
+BOOL isJumpRel32(PCODE pCode)
+{
+ CONTRACTL {
+ NOTHROW;
+ GC_NOTRIGGER;
+ SO_TOLERANT;
+ SUPPORTS_DAC;
+ } CONTRACTL_END;
+
+ PTR_BYTE pbCode = PTR_BYTE(pCode);
+
+ return 0xE9 == pbCode[0];
+}
+
+//
+// Given the same pBuffer that was used by emitJump this
+// method decodes the instructions and returns the jump target
+//
+PCODE decodeJump32(PCODE pBuffer)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ SO_TOLERANT;
+ SUPPORTS_DAC;
+ }
+ CONTRACTL_END;
+
+ // jmp rel32
+ _ASSERTE(isJumpRel32(pBuffer));
+
+ return rel32Decode(pBuffer+1);
+}
+
+BOOL isJumpRel64(PCODE pCode)
+{
+ CONTRACTL {
+ NOTHROW;
+ GC_NOTRIGGER;
+ SO_TOLERANT;
+ SUPPORTS_DAC;
+ } CONTRACTL_END;
+
+ PTR_BYTE pbCode = PTR_BYTE(pCode);
+
+ return 0x48 == pbCode[0] &&
+ 0xB8 == pbCode[1] &&
+ 0xFF == pbCode[10] &&
+ 0xE0 == pbCode[11];
+}
+
+PCODE decodeJump64(PCODE pBuffer)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ SO_TOLERANT;
+ SUPPORTS_DAC;
+ }
+ CONTRACTL_END;
+
+ // mov rax, xxx
+ // jmp rax
+ _ASSERTE(isJumpRel64(pBuffer));
+
+ return *PTR_UINT64(pBuffer+2);
+}
+
+#ifdef DACCESS_COMPILE
+BOOL GetAnyThunkTarget (CONTEXT *pctx, TADDR *pTarget, TADDR *pTargetMethodDesc)
+{
+ TADDR pThunk = GetIP(pctx);
+
+ *pTargetMethodDesc = NULL;
+
+ //
+ // Check for something generated by emitJump.
+ //
+ if (isJumpRel64(pThunk))
+ {
+ *pTarget = decodeJump64(pThunk);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+#endif // DACCESS_COMPILE
+
+
+#ifndef DACCESS_COMPILE
+
+// Note: This is only used on server GC on Windows.
+//
+// This function returns the number of logical processors on a given physical chip. If it cannot
+// determine the number of logical cpus, or the machine is not populated uniformly with the same
+// type of processors, this function returns 1.
+
+extern "C" DWORD __stdcall getcpuid(DWORD arg, unsigned char result[16]);
+
+// fix this if/when AMD does multicore or SMT
+DWORD GetLogicalCpuCount()
+{
+ // No CONTRACT possible because GetLogicalCpuCount uses SEH
+
+ STATIC_CONTRACT_THROWS;
+ STATIC_CONTRACT_GC_NOTRIGGER;
+
+ static DWORD val = 0;
+
+ // cache value for later re-use
+ if (val)
+ {
+ return val;
+ }
+
+ struct Param : DefaultCatchFilterParam
+ {
+ DWORD retVal;
+ } param;
+ param.pv = COMPLUS_EXCEPTION_EXECUTE_HANDLER;
+ param.retVal = 1;
+
+ PAL_TRY(Param *, pParam, &param)
+ {
+
+ unsigned char buffer[16];
+ DWORD maxCpuId = getcpuid(0, buffer);
+ DWORD* dwBuffer = (DWORD*)buffer;
+
+ if (maxCpuId < 1)
+ goto qExit;
+
+ if (dwBuffer[1] == 'uneG') {
+ if (dwBuffer[3] == 'Ieni') {
+ if (dwBuffer[2] == 'letn') { // get SMT/multicore enumeration for Intel EM64T
+
+
+ // TODO: Currently GetLogicalCpuCountFromOS() and GetLogicalCpuCountFallback() are broken on
+ // multi-core processor, but we never call into those two functions since we don't halve the
+ // gen0size when it's prescott and above processor. We keep the old version here for earlier
+ // generation system(Northwood based), perf data suggests on those systems, halve gen0 size
+ // still boost the performance(ex:Biztalk boosts about 17%). So on earlier systems(Northwood)
+ // based, we still go ahead and halve gen0 size. The logic in GetLogicalCpuCountFromOS()
+ // and GetLogicalCpuCountFallback() works fine for those earlier generation systems.
+ // If it's a Prescott and above processor or Multi-core, perf data suggests not to halve gen0
+ // size at all gives us overall better performance.
+ // This is going to be fixed with a new version in orcas time frame.
+
+ if( (maxCpuId > 3) && (maxCpuId < 0x80000000) )
+ goto qExit;
+
+ val = GetLogicalCpuCountFromOS(); //try to obtain HT enumeration from OS API
+ if (val )
+ {
+ pParam->retVal = val; // OS API HT enumeration successful, we are Done
+ goto qExit;
+ }
+
+ val = GetLogicalCpuCountFallback(); // Fallback to HT enumeration using CPUID
+ if( val )
+ pParam->retVal = val;
+ }
+ }
+ }
+qExit: ;
+ }
+
+ PAL_EXCEPT_FILTER(DefaultCatchFilter)
+ {
+ }
+ PAL_ENDTRY
+
+ if (val == 0)
+ {
+ val = param.retVal;
+ }
+
+ return param.retVal;
+}
+
+void EncodeLoadAndJumpThunk (LPBYTE pBuffer, LPVOID pv, LPVOID pTarget)
+{
+ CONTRACTL
+ {
+ THROWS;
+ GC_NOTRIGGER;
+ MODE_ANY;
+
+ PRECONDITION(CheckPointer(pBuffer));
+ }
+ CONTRACTL_END;
+
+ // mov r10, pv 49 ba xx xx xx xx xx xx xx xx
+
+ pBuffer[0] = 0x49;
+ pBuffer[1] = 0xBA;
+
+ *((UINT64 UNALIGNED *)&pBuffer[2]) = (UINT64)pv;
+
+ // mov rax, pTarget 48 b8 xx xx xx xx xx xx xx xx
+
+ pBuffer[10] = 0x48;
+ pBuffer[11] = 0xB8;
+
+ *((UINT64 UNALIGNED *)&pBuffer[12]) = (UINT64)pTarget;
+
+ // jmp rax ff e0
+
+ pBuffer[20] = 0xFF;
+ pBuffer[21] = 0xE0;
+
+ _ASSERTE(DbgIsExecutable(pBuffer, 22));
+}
+
+void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target)
+{
+ CONTRACT_VOID
+ {
+ THROWS;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ }
+ CONTRACT_END;
+
+ BYTE *pBuffer = (BYTE*)pCOMMethod - COMMETHOD_CALL_PRESTUB_SIZE;
+
+ // We need the target to be in a 64-bit aligned memory location and the call instruction
+ // to immediately precede the ComCallMethodDesc. We'll generate an indirect call to avoid
+ // consuming 3 qwords for this (mov rax, | target | nops & call rax).
+
+ // dq 123456789abcdef0h
+ // nop 90
+ // nop 90
+ // call [$ - 10] ff 15 f0 ff ff ff
+
+ *((UINT64 *)&pBuffer[COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET]) = (UINT64)target;
+
+ pBuffer[-2] = 0x90;
+ pBuffer[-1] = 0x90;
+
+ pBuffer[0] = 0xFF;
+ pBuffer[1] = 0x15;
+ *((UINT32 UNALIGNED *)&pBuffer[2]) = (UINT32)(COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET - COMMETHOD_CALL_PRESTUB_SIZE);
+
+ _ASSERTE(DbgIsExecutable(pBuffer, COMMETHOD_CALL_PRESTUB_SIZE));
+
+ RETURN;
+}
+
+void emitJump(LPBYTE pBuffer, LPVOID target)
+{
+ CONTRACTL
+ {
+ THROWS;
+ GC_NOTRIGGER;
+ MODE_ANY;
+
+ PRECONDITION(CheckPointer(pBuffer));
+ }
+ CONTRACTL_END;
+
+ // mov rax, 123456789abcdef0h 48 b8 xx xx xx xx xx xx xx xx
+ // jmp rax ff e0
+
+ pBuffer[0] = 0x48;
+ pBuffer[1] = 0xB8;
+
+ *((UINT64 UNALIGNED *)&pBuffer[2]) = (UINT64)target;
+
+ pBuffer[10] = 0xFF;
+ pBuffer[11] = 0xE0;
+
+ _ASSERTE(DbgIsExecutable(pBuffer, 12));
+}
+
+void UMEntryThunkCode::Encode(BYTE* pTargetCode, void* pvSecretParam)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
+ }
+ CONTRACTL_END;
+
+ // padding // CC CC CC CC
+ // mov r10, pUMEntryThunk // 49 ba xx xx xx xx xx xx xx xx // METHODDESC_REGISTER
+ // mov rax, pJmpDest // 48 b8 xx xx xx xx xx xx xx xx // need to ensure this imm64 is qword aligned
+ // TAILJMP_RAX // 48 FF E0
+
+#ifdef _DEBUG
+ m_padding[0] = X86_INSTR_INT3;
+ m_padding[1] = X86_INSTR_INT3;
+ m_padding[2] = X86_INSTR_INT3;
+ m_padding[3] = X86_INSTR_INT3;
+#endif // _DEBUG
+ m_movR10[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT | REX_OPCODE_REG_EXT;
+ m_movR10[1] = 0xBA;
+ m_uet = pvSecretParam;
+ m_movRAX[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+ m_movRAX[1] = 0xB8;
+ m_execstub = pTargetCode;
+ m_jmpRAX[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT;
+ m_jmpRAX[1] = 0xFF;
+ m_jmpRAX[2] = 0xE0;
+
+ _ASSERTE(DbgIsExecutable(&m_movR10[0], &m_jmpRAX[3]-&m_movR10[0]));
+}
+
+UMEntryThunk* UMEntryThunk::Decode(LPVOID pCallback)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ UMEntryThunkCode *pThunkCode = (UMEntryThunkCode*)((BYTE*)pCallback - UMEntryThunkCode::GetEntryPointOffset());
+
+ return (UMEntryThunk*)pThunkCode->m_uet;
+}
+
+INT32 rel32UsingJumpStub(INT32 UNALIGNED * pRel32, PCODE target, MethodDesc *pMethod, LoaderAllocator *pLoaderAllocator /* = NULL */)
+{
+ CONTRACTL
+ {
+ THROWS; // Creating a JumpStub could throw OutOfMemory
+ GC_NOTRIGGER;
+
+ PRECONDITION(pMethod != NULL || pLoaderAllocator != NULL);
+ // If a loader allocator isn't explicitly provided, we must be able to get one via the MethodDesc.
+ PRECONDITION(pLoaderAllocator != NULL || pMethod->GetLoaderAllocator() != NULL);
+ // If a domain is provided, the MethodDesc mustn't yet be set up to have one, or it must match the MethodDesc's domain,
+ // unless we're in a compilation domain (NGen loads assemblies as domain-bound but compiles them as domain neutral).
+ PRECONDITION(!pLoaderAllocator || !pMethod || pMethod->GetMethodDescChunk()->GetMethodTablePtr()->IsNull() ||
+ pLoaderAllocator == pMethod->GetMethodDescChunk()->GetFirstMethodDesc()->GetLoaderAllocatorForCode() || IsCompilationProcess());
+ }
+ CONTRACTL_END;
+
+ TADDR baseAddr = (TADDR)pRel32 + 4;
+
+ INT_PTR offset = target - baseAddr;
+
+ if (!FitsInI4(offset) INDEBUG(|| PEDecoder::GetForceRelocs()))
+ {
+ TADDR loAddr = baseAddr + INT32_MIN;
+ if (loAddr > baseAddr) loAddr = UINT64_MIN; // overflow
+
+ TADDR hiAddr = baseAddr + INT32_MAX;
+ if (hiAddr < baseAddr) hiAddr = UINT64_MAX; // overflow
+
+ PCODE jumpStubAddr = ExecutionManager::jumpStub(pMethod,
+ target,
+ (BYTE *)loAddr,
+ (BYTE *)hiAddr,
+ pLoaderAllocator);
+
+ offset = jumpStubAddr - baseAddr;
+
+ if (!FitsInI4(offset))
+ {
+ _ASSERTE(!"jump stub was not in expected range");
+ EEPOLICY_HANDLE_FATAL_ERROR(COR_E_EXECUTIONENGINE);
+ }
+ }
+
+ _ASSERTE(FitsInI4(offset));
+ return static_cast<INT32>(offset);
+}
+
+BOOL DoesSlotCallPrestub(PCODE pCode)
+{
+ CONTRACTL {
+ NOTHROW;
+ GC_NOTRIGGER;
+ SO_TOLERANT;
+ PRECONDITION(pCode != GetPreStubEntryPoint());
+ } CONTRACTL_END;
+
+ // AMD64 has the following possible sequences for prestub logic:
+ // 1. slot -> temporary entrypoint -> prestub
+ // 2. slot -> precode -> prestub
+ // 3. slot -> precode -> jumprel64 (jump stub) -> prestub
+ // 4. slot -> precode -> jumprel64 (NGEN case) -> prestub
+
+#ifdef HAS_COMPACT_ENTRYPOINTS
+ if (MethodDescChunk::GetMethodDescFromCompactEntryPoint(pCode, TRUE) != NULL)
+ {
+ return TRUE;
+ }
+#endif
+
+ if (!IS_ALIGNED(pCode, PRECODE_ALIGNMENT))
+ {
+ return FALSE;
+ }
+
+#ifdef HAS_FIXUP_PRECODE
+ if (*PTR_BYTE(pCode) == X86_INSTR_CALL_REL32)
+ {
+ // Note that call could have been patched to jmp in the meantime
+ pCode = rel32Decode(pCode+1);
+
+#ifdef FEATURE_PREJIT
+ // NGEN helper
+ if (*PTR_BYTE(pCode) == X86_INSTR_JMP_REL32) {
+ pCode = (TADDR)rel32Decode(pCode+1);
+ }
+#endif
+
+ // JumpStub
+ if (isJumpRel64(pCode)) {
+ pCode = decodeJump64(pCode);
+ }
+
+ return pCode == (TADDR)PrecodeFixupThunk;
+ }
+#endif
+
+ if (*PTR_USHORT(pCode) != X86_INSTR_MOV_R10_IMM64 || // mov rax,XXXX
+ *PTR_BYTE(pCode+10) != X86_INSTR_NOP || // nop
+ *PTR_BYTE(pCode+11) != X86_INSTR_JMP_REL32) // jmp rel32
+ {
+ return FALSE;
+ }
+ pCode = rel32Decode(pCode+12);
+
+#ifdef FEATURE_PREJIT
+ // NGEN helper
+ if (*PTR_BYTE(pCode) == X86_INSTR_JMP_REL32) {
+ pCode = (TADDR)rel32Decode(pCode+1);
+ }
+#endif
+
+ // JumpStub
+ if (isJumpRel64(pCode)) {
+ pCode = decodeJump64(pCode);
+ }
+
+ return pCode == GetPreStubEntryPoint();
+}
+
+//
+// Some AMD64 assembly functions have one or more DWORDS at the end of the function
+// that specify the offsets where significant instructions are
+// we use this function to get at these offsets
+//
+DWORD GetOffsetAtEndOfFunction(ULONGLONG uImageBase,
+ PT_RUNTIME_FUNCTION pFunctionEntry,
+ int offsetNum /* = 1*/)
+{
+ CONTRACTL
+ {
+ MODE_ANY;
+ NOTHROW;
+ GC_NOTRIGGER;
+ SO_TOLERANT;
+ PRECONDITION((offsetNum > 0) && (offsetNum < 20)); /* we only allow reasonable offsetNums 1..19 */
+ }
+ CONTRACTL_END;
+
+ DWORD functionSize = pFunctionEntry->EndAddress - pFunctionEntry->BeginAddress;
+ BYTE* pEndOfFunction = (BYTE*) (uImageBase + pFunctionEntry->EndAddress);
+ DWORD* pOffset = (DWORD*) (pEndOfFunction) - offsetNum;
+ DWORD offsetInFunc = *pOffset;
+
+ _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/cGenAMD64.cpp", (offsetInFunc >= 0) && (offsetInFunc < functionSize));
+
+ return offsetInFunc;
+}
+
+//==========================================================================================
+// In NGen image, virtual slots inherited from cross-module dependencies point to jump thunks.
+// These jump thunk initially point to VirtualMethodFixupStub which transfers control here.
+// This method 'VirtualMethodFixupWorker' will patch the jump thunk to point to the actual
+// inherited method body after we have execute the precode and a stable entry point.
+//
+EXTERN_C PCODE VirtualMethodFixupWorker(TransitionBlock * pTransitionBlock, CORCOMPILE_VIRTUAL_IMPORT_THUNK * pThunk)
+{
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS; // GC not allowed until we call pEMFrame->SetFunction(pMD);
+
+ ENTRY_POINT;
+ }
+ CONTRACTL_END;
+
+ MAKE_CURRENT_THREAD_AVAILABLE();
+
+ PCODE pCode = NULL;
+ MethodDesc * pMD = NULL;
+
+#ifdef _DEBUG
+ Thread::ObjectRefFlush(CURRENT_THREAD);
+#endif
+
+ BEGIN_SO_INTOLERANT_CODE(CURRENT_THREAD);
+
+ _ASSERTE(IS_ALIGNED((size_t)pThunk, sizeof(INT64)));
+
+ FrameWithCookie<ExternalMethodFrame> frame(pTransitionBlock);
+ ExternalMethodFrame * pEMFrame = &frame;
+
+ OBJECTREF pThisPtr = pEMFrame->GetThis();
+ _ASSERTE(pThisPtr != NULL);
+ VALIDATEOBJECT(pThisPtr);
+
+ MethodTable * pMT = pThisPtr->GetTrueMethodTable();
+
+ WORD slotNumber = pThunk->slotNum;
+ _ASSERTE(slotNumber != (WORD)-1);
+
+ pCode = pMT->GetRestoredSlot(slotNumber);
+
+ if (!DoesSlotCallPrestub(pCode))
+ {
+ pMD = MethodTable::GetMethodDescForSlotAddress(pCode);
+
+ pEMFrame->SetFunction(pMD); // We will use the pMD to enumerate the GC refs in the arguments
+ pEMFrame->Push(CURRENT_THREAD);
+
+ INSTALL_MANAGED_EXCEPTION_DISPATCHER;
+ INSTALL_UNWIND_AND_CONTINUE_HANDLER_NO_PROBE;
+
+ // Skip fixup precode jump for better perf
+ PCODE pDirectTarget = Precode::TryToSkipFixupPrecode(pCode);
+ if (pDirectTarget != NULL)
+ pCode = pDirectTarget;
+
+ INT64 oldValue = *(INT64*)pThunk;
+ BYTE* pOldValue = (BYTE*)&oldValue;
+
+ if (pOldValue[0] == X86_INSTR_CALL_REL32)
+ {
+ INT64 newValue = oldValue;
+ BYTE* pNewValue = (BYTE*)&newValue;
+ pNewValue[0] = X86_INSTR_JMP_REL32;
+
+ *(INT32 *)(pNewValue+1) = rel32UsingJumpStub((INT32*)(&pThunk->callJmp[1]), pCode, pMD, NULL);
+
+ _ASSERTE(IS_ALIGNED(pThunk, sizeof(INT64)));
+ EnsureWritableExecutablePages(pThunk, sizeof(INT64));
+ FastInterlockCompareExchangeLong((INT64*)pThunk, newValue, oldValue);
+
+ FlushInstructionCache(GetCurrentProcess(), pThunk, 8);
+ }
+
+ UNINSTALL_UNWIND_AND_CONTINUE_HANDLER_NO_PROBE;
+ UNINSTALL_MANAGED_EXCEPTION_DISPATCHER;
+ pEMFrame->Pop(CURRENT_THREAD);
+ }
+
+ // Ready to return
+
+ END_SO_INTOLERANT_CODE;
+
+ return pCode;
+}
+
+#ifdef FEATURE_READYTORUN
+
+//
+// Allocation of dynamic helpers
+//
+
+#define DYNAMIC_HELPER_ALIGNMENT sizeof(TADDR)
+
+#define BEGIN_DYNAMIC_HELPER_EMIT(size) \
+ SIZE_T cb = size; \
+ SIZE_T cbAligned = ALIGN_UP(cb, DYNAMIC_HELPER_ALIGNMENT); \
+ BYTE * pStart = (BYTE *)(void *)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \
+ BYTE * p = pStart;
+
+#define END_DYNAMIC_HELPER_EMIT() \
+ _ASSERTE(pStart + cb == p); \
+ while (p < pStart + cbAligned) *p++ = X86_INSTR_INT3; \
+ ClrFlushInstructionCache(pStart, cbAligned); \
+ return (PCODE)pStart
+
+PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
+{
+ STANDARD_VM_CONTRACT;
+
+ BEGIN_DYNAMIC_HELPER_EMIT(15);
+
+#ifdef UNIX_AMD64_ABI
+ *(UINT16 *)p = 0xBF48; // mov rdi, XXXXXX
+#else
+ *(UINT16 *)p = 0xB948; // mov rcx, XXXXXX
+#endif
+ p += 2;
+ *(TADDR *)p = arg;
+ p += 8;
+
+ *p++ = X86_INSTR_JMP_REL32; // jmp rel32
+ *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator);
+ p += 4;
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+void DynamicHelpers::EmitHelperWithArg(BYTE*& p, LoaderAllocator * pAllocator, TADDR arg, PCODE target)
+{
+ CONTRACTL
+ {
+ GC_NOTRIGGER;
+ PRECONDITION(p != NULL && target != NULL);
+ }
+ CONTRACTL_END;
+
+ // Move an an argument into the second argument register and jump to a target function.
+
+#ifdef UNIX_AMD64_ABI
+ *(UINT16 *)p = 0xBE48; // mov rsi, XXXXXX
+#else
+ *(UINT16 *)p = 0xBA48; // mov rdx, XXXXXX
+#endif
+ p += 2;
+ *(TADDR *)p = arg;
+ p += 8;
+
+ *p++ = X86_INSTR_JMP_REL32; // jmp rel32
+ *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator);
+ p += 4;
+}
+
+PCODE DynamicHelpers::CreateHelperWithArg(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT(15);
+
+ EmitHelperWithArg(p, pAllocator, arg, target);
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT(25);
+
+#ifdef UNIX_AMD64_ABI
+ *(UINT16 *)p = 0xBF48; // mov rdi, XXXXXX
+#else
+ *(UINT16 *)p = 0xB948; // mov rcx, XXXXXX
+#endif
+ p += 2;
+ *(TADDR *)p = arg;
+ p += 8;
+
+#ifdef UNIX_AMD64_ABI
+ *(UINT16 *)p = 0xBE48; // mov rsi, XXXXXX
+#else
+ *(UINT16 *)p = 0xBA48; // mov rdx, XXXXXX
+#endif
+ p += 2;
+ *(TADDR *)p = arg2;
+ p += 8;
+
+ *p++ = X86_INSTR_JMP_REL32; // jmp rel32
+ *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator);
+ p += 4;
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateHelperArgMove(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT(18);
+
+#ifdef UNIX_AMD64_ABI
+ *p++ = 0x48; // mov rsi, rdi
+ *(UINT16 *)p = 0xF78B;
+#else
+ *p++ = 0x48; // mov rdx, rcx
+ *(UINT16 *)p = 0xD18B;
+#endif
+ p += 2;
+
+#ifdef UNIX_AMD64_ABI
+ *(UINT16 *)p = 0xBF48; // mov rdi, XXXXXX
+#else
+ *(UINT16 *)p = 0xB948; // mov rcx, XXXXXX
+#endif
+ p += 2;
+ *(TADDR *)p = arg;
+ p += 8;
+
+ *p++ = X86_INSTR_JMP_REL32; // jmp rel32
+ *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator);
+ p += 4;
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateReturn(LoaderAllocator * pAllocator)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT(1);
+
+ *p++ = 0xC3; // ret
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateReturnConst(LoaderAllocator * pAllocator, TADDR arg)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT(11);
+
+ *(UINT16 *)p = 0xB848; // mov rax, XXXXXX
+ p += 2;
+ *(TADDR *)p = arg;
+ p += 8;
+
+ *p++ = 0xC3; // ret
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateReturnIndirConst(LoaderAllocator * pAllocator, TADDR arg, INT8 offset)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT((offset != 0) ? 15 : 11);
+
+ *(UINT16 *)p = 0xA148; // mov rax, [XXXXXX]
+ p += 2;
+ *(TADDR *)p = arg;
+ p += 8;
+
+ if (offset != 0)
+ {
+ // add rax, <offset>
+ *p++ = 0x48;
+ *p++ = 0x83;
+ *p++ = 0xC0;
+ *p++ = offset;
+ }
+
+ *p++ = 0xC3; // ret
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, PCODE target)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT(15);
+
+#ifdef UNIX_AMD64_ABI
+ *(UINT16 *)p = 0xBA48; // mov rdx, XXXXXX
+#else
+ *(UINT16 *)p = 0xB849; // mov r8, XXXXXX
+#endif
+ p += 2;
+ *(TADDR *)p = arg;
+ p += 8;
+
+ *p++ = X86_INSTR_JMP_REL32; // jmp rel32
+ *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator);
+ p += 4;
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target)
+{
+ BEGIN_DYNAMIC_HELPER_EMIT(25);
+
+#ifdef UNIX_AMD64_ABI
+ *(UINT16 *)p = 0xBA48; // mov rdx, XXXXXX
+#else
+ *(UINT16 *)p = 0xB849; // mov r8, XXXXXX
+#endif
+ p += 2;
+ *(TADDR *)p = arg;
+ p += 8;
+
+#ifdef UNIX_AMD64_ABI
+ *(UINT16 *)p = 0xB948; // mov rcx, XXXXXX
+#else
+ *(UINT16 *)p = 0xB949; // mov r9, XXXXXX
+#endif
+ p += 2;
+ *(TADDR *)p = arg2;
+ p += 8;
+
+ *p++ = X86_INSTR_JMP_REL32; // jmp rel32
+ *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator);
+ p += 4;
+
+ END_DYNAMIC_HELPER_EMIT();
+}
+
+PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator, CORINFO_RUNTIME_LOOKUP * pLookup, DWORD dictionaryIndexAndSlot, Module * pModule)
+{
+ STANDARD_VM_CONTRACT;
+
+ PCODE helperAddress = (pLookup->helper == CORINFO_HELP_RUNTIMEHANDLE_METHOD ?
+ GetEEFuncEntryPoint(JIT_GenericHandleMethodWithSlotAndModule) :
+ GetEEFuncEntryPoint(JIT_GenericHandleClassWithSlotAndModule));
+
+ GenericHandleArgs * pArgs = (GenericHandleArgs *)(void *)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(sizeof(GenericHandleArgs), DYNAMIC_HELPER_ALIGNMENT);
+ pArgs->dictionaryIndexAndSlot = dictionaryIndexAndSlot;
+ pArgs->signature = pLookup->signature;
+ pArgs->module = (CORINFO_MODULE_HANDLE)pModule;
+
+ // It's available only via the run-time helper function
+ if (pLookup->indirections == CORINFO_USEHELPER)
+ {
+ BEGIN_DYNAMIC_HELPER_EMIT(15);
+
+ // rcx/rdi contains the generic context parameter
+ // mov rdx/rsi,pArgs
+ // jmp helperAddress
+ EmitHelperWithArg(p, pAllocator, (TADDR)pArgs, helperAddress);
+
+ END_DYNAMIC_HELPER_EMIT();
+ }
+ else
+ {
+ int indirectionsSize = 0;
+ for (WORD i = 0; i < pLookup->indirections; i++)
+ indirectionsSize += (pLookup->offsets[i] >= 0x80 ? 7 : 4);
+
+ int codeSize = indirectionsSize + (pLookup->testForNull ? 30 : 4);
+
+ BEGIN_DYNAMIC_HELPER_EMIT(codeSize);
+
+ if (pLookup->testForNull)
+ {
+ // rcx/rdi contains the generic context parameter. Save a copy of it in the rax register
+#ifdef UNIX_AMD64_ABI
+ *(UINT32*)p = 0x00f88948; p += 3; // mov rax,rdi
+#else
+ *(UINT32*)p = 0x00c88948; p += 3; // mov rax,rcx
+#endif
+ }
+
+ for (WORD i = 0; i < pLookup->indirections; i++)
+ {
+#ifdef UNIX_AMD64_ABI
+ // mov rdi,qword ptr [rdi+offset]
+ if (pLookup->offsets[i] >= 0x80)
+ {
+ *(UINT32*)p = 0x00bf8b48; p += 3;
+ *(UINT32*)p = (UINT32)pLookup->offsets[i]; p += 4;
+ }
+ else
+ {
+ *(UINT32*)p = 0x007f8b48; p += 3;
+ *p++ = (BYTE)pLookup->offsets[i];
+ }
+#else
+ // mov rcx,qword ptr [rcx+offset]
+ if (pLookup->offsets[i] >= 0x80)
+ {
+ *(UINT32*)p = 0x00898b48; p += 3;
+ *(UINT32*)p = (UINT32)pLookup->offsets[i]; p += 4;
+ }
+ else
+ {
+ *(UINT32*)p = 0x00498b48; p += 3;
+ *p++ = (BYTE)pLookup->offsets[i];
+ }
+#endif
+ }
+
+ // No null test required
+ if (!pLookup->testForNull)
+ {
+ // No fixups needed for R2R
+
+#ifdef UNIX_AMD64_ABI
+ *(UINT32*)p = 0x00f88948; p += 3; // mov rax,rdi
+#else
+ *(UINT32*)p = 0x00c88948; p += 3; // mov rax,rcx
+#endif
+ *p++ = 0xC3; // ret
+ }
+ else
+ {
+ // rcx/rdi contains the value of the dictionary slot entry
+
+ _ASSERTE(pLookup->indirections != 0);
+
+#ifdef UNIX_AMD64_ABI
+ *(UINT32*)p = 0x00ff8548; p += 3; // test rdi,rdi
+#else
+ *(UINT32*)p = 0x00c98548; p += 3; // test rcx,rcx
+#endif
+
+ // je 'HELPER_CALL' (a jump of 4 bytes)
+ *(UINT16*)p = 0x0474; p += 2;
+
+#ifdef UNIX_AMD64_ABI
+ *(UINT32*)p = 0x00f88948; p += 3; // mov rax,rdi
+#else
+ *(UINT32*)p = 0x00c88948; p += 3; // mov rax,rcx
+#endif
+ *p++ = 0xC3; // ret
+
+ // 'HELPER_CALL'
+ {
+ // Put the generic context back into rcx (was previously saved in rax)
+#ifdef UNIX_AMD64_ABI
+ *(UINT32*)p = 0x00c78948; p += 3; // mov rdi,rax
+#else
+ *(UINT32*)p = 0x00c18948; p += 3; // mov rcx,rax
+#endif
+
+ // mov rdx,pArgs
+ // jmp helperAddress
+ EmitHelperWithArg(p, pAllocator, (TADDR)pArgs, helperAddress);
+ }
+ }
+
+ END_DYNAMIC_HELPER_EMIT();
+ }
+}
+
+#endif // FEATURE_READYTORUN
+
+#endif // DACCESS_COMPILE