diff options
Diffstat (limited to 'src/vm/amd64/cgenamd64.cpp')
-rw-r--r-- | src/vm/amd64/cgenamd64.cpp | 1278 |
1 files changed, 1278 insertions, 0 deletions
diff --git a/src/vm/amd64/cgenamd64.cpp b/src/vm/amd64/cgenamd64.cpp new file mode 100644 index 0000000000..51aac1ebc6 --- /dev/null +++ b/src/vm/amd64/cgenamd64.cpp @@ -0,0 +1,1278 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// +// Various helper routines for generating AMD64 assembly code. +// + +// Precompiled Header + +#include "common.h" + +#include "stublink.h" +#include "cgensys.h" +#include "siginfo.hpp" +#include "excep.h" +#include "ecall.h" +#include "dllimport.h" +#include "dllimportcallback.h" +#include "dbginterface.h" +#include "fcall.h" +#include "array.h" +#include "virtualcallstub.h" +#include "jitinterface.h" + +#ifdef FEATURE_COMINTEROP +#include "clrtocomcall.h" +#endif // FEATURE_COMINTEROP + +void UpdateRegDisplayFromCalleeSavedRegisters(REGDISPLAY * pRD, CalleeSavedRegisters * pRegs) +{ + LIMITED_METHOD_CONTRACT; + + T_CONTEXT * pContext = pRD->pCurrentContext; +#define CALLEE_SAVED_REGISTER(regname) pContext->regname = pRegs->regname; + ENUM_CALLEE_SAVED_REGISTERS(); +#undef CALLEE_SAVED_REGISTER + + KNONVOLATILE_CONTEXT_POINTERS * pContextPointers = pRD->pCurrentContextPointers; +#define CALLEE_SAVED_REGISTER(regname) pContextPointers->regname = (PULONG64)&pRegs->regname; + ENUM_CALLEE_SAVED_REGISTERS(); +#undef CALLEE_SAVED_REGISTER +} + +void ClearRegDisplayArgumentAndScratchRegisters(REGDISPLAY * pRD) +{ + LIMITED_METHOD_CONTRACT; + + KNONVOLATILE_CONTEXT_POINTERS * pContextPointers = pRD->pCurrentContextPointers; + pContextPointers->Rax = NULL; +#ifdef UNIX_AMD64_ABI + pContextPointers->Rsi = NULL; + pContextPointers->Rdi = NULL; +#endif + pContextPointers->Rcx = NULL; + pContextPointers->Rdx = NULL; + pContextPointers->R8 = NULL; + pContextPointers->R9 = NULL; + pContextPointers->R10 = NULL; + pContextPointers->R11 = NULL; +} + +void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + LIMITED_METHOD_CONTRACT; + + pRD->IsCallerContextValid = FALSE; + pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. + + pRD->pCurrentContext->Rip = GetReturnAddress(); + pRD->pCurrentContext->Rsp = GetSP(); + + UpdateRegDisplayFromCalleeSavedRegisters(pRD, GetCalleeSavedRegisters()); + ClearRegDisplayArgumentAndScratchRegisters(pRD); + + SyncRegDisplayToCurrentContext(pRD); + + LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK TransitionFrame::UpdateRegDisplay(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP)); +} + +#ifndef DACCESS_COMPILE + +extern "C" TADDR s_pStubHelperFrameVPtr; +TADDR s_pStubHelperFrameVPtr = StubHelperFrame::GetMethodFrameVPtr(); + +void TailCallFrame::InitFromContext(T_CONTEXT * pContext) +{ + WRAPPER_NO_CONTRACT; + +#define CALLEE_SAVED_REGISTER(regname) m_calleeSavedRegisters.regname = pContext->regname; + ENUM_CALLEE_SAVED_REGISTERS(); +#undef CALLEE_SAVED_REGISTER + + m_pGCLayout = 0; + m_ReturnAddress = pContext->Rip; +} + +#endif // !DACCESS_COMPILE + +void TailCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + LIMITED_METHOD_CONTRACT; + + pRD->IsCallerContextValid = FALSE; + pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. + + pRD->pCurrentContext->Rip = m_ReturnAddress; + pRD->pCurrentContext->Rsp = dac_cast<TADDR>(this) + sizeof(*this); + + UpdateRegDisplayFromCalleeSavedRegisters(pRD, &m_calleeSavedRegisters); + ClearRegDisplayArgumentAndScratchRegisters(pRD); + + SyncRegDisplayToCurrentContext(pRD); + + LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK TransitionFrame::UpdateRegDisplay(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP)); +} + +void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; +#ifdef PROFILING_SUPPORTED + PRECONDITION(CORProfilerStackSnapshotEnabled() || InlinedCallFrame::FrameHasActiveCall(this)); +#endif + HOST_NOCALLS; + MODE_ANY; + SUPPORTS_DAC; + } + CONTRACTL_END; + + if (!InlinedCallFrame::FrameHasActiveCall(this)) + { + LOG((LF_CORDB, LL_ERROR, "WARNING: InlinedCallFrame::UpdateRegDisplay called on inactive frame %p\n", this)); + return; + } + + pRD->IsCallerContextValid = FALSE; + pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. + + pRD->pCurrentContext->Rip = *(DWORD64 *)&m_pCallerReturnAddress; + pRD->pCurrentContext->Rsp = *(DWORD64 *)&m_pCallSiteSP; + pRD->pCurrentContext->Rbp = *(DWORD64 *)&m_pCalleeSavedFP; + + ClearRegDisplayArgumentAndScratchRegisters(pRD); + +#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContextPointers->regname = NULL; + ENUM_CALLEE_SAVED_REGISTERS(); +#undef CALLEE_SAVED_REGISTER + + pRD->pCurrentContextPointers->Rbp = (DWORD64 *)&m_pCalleeSavedFP; + + SyncRegDisplayToCurrentContext(pRD); + + LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK InlinedCallFrame::UpdateRegDisplay(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP)); +} + +void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + PRECONDITION(m_MachState._pRetAddr == PTR_TADDR(&m_MachState.m_Rip)); + SUPPORTS_DAC; + } + CONTRACTL_END; + + pRD->IsCallerContextValid = FALSE; + pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. + + // + // Copy the saved state from the frame to the current context. + // + + LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK HelperMethodFrame::UpdateRegDisplay cached ip:%p, sp:%p\n", m_MachState.m_Rip, m_MachState.m_Rsp)); + +#if defined(DACCESS_COMPILE) + // For DAC, we may get here when the HMF is still uninitialized. + // So we may need to unwind here. + if (!m_MachState.isValid()) + { + // This allocation throws on OOM. + MachState* pUnwoundState = (MachState*)DacAllocHostOnlyInstance(sizeof(*pUnwoundState), true); + + InsureInit(false, pUnwoundState); + + pRD->pCurrentContext->Rip = pRD->ControlPC = pUnwoundState->m_Rip; + pRD->pCurrentContext->Rsp = pRD->SP = pUnwoundState->m_Rsp; + +#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContext->regname = pUnwoundState->m_Capture.regname; + ENUM_CALLEE_SAVED_REGISTERS(); +#undef CALLEE_SAVED_REGISTER + +#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContextPointers->regname = pUnwoundState->m_Ptrs.p##regname; + ENUM_CALLEE_SAVED_REGISTERS(); +#undef CALLEE_SAVED_REGISTER + + ClearRegDisplayArgumentAndScratchRegisters(pRD); + + return; + } +#endif // DACCESS_COMPILE + + pRD->pCurrentContext->Rip = pRD->ControlPC = m_MachState.m_Rip; + pRD->pCurrentContext->Rsp = pRD->SP = m_MachState.m_Rsp; + +#ifdef FEATURE_PAL + +#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContext->regname = (m_MachState.m_Ptrs.p##regname != NULL) ? \ + *m_MachState.m_Ptrs.p##regname : m_MachState.m_Unwound.regname; + ENUM_CALLEE_SAVED_REGISTERS(); +#undef CALLEE_SAVED_REGISTER + +#else // FEATURE_PAL + +#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContext->regname = *m_MachState.m_Ptrs.p##regname; + ENUM_CALLEE_SAVED_REGISTERS(); +#undef CALLEE_SAVED_REGISTER + +#endif // FEATURE_PAL + +#define CALLEE_SAVED_REGISTER(regname) pRD->pCurrentContextPointers->regname = m_MachState.m_Ptrs.p##regname; + ENUM_CALLEE_SAVED_REGISTERS(); +#undef CALLEE_SAVED_REGISTER + + // + // Clear all knowledge of scratch registers. We're skipping to any + // arbitrary point on the stack, and frames aren't required to preserve or + // keep track of these anyways. + // + + ClearRegDisplayArgumentAndScratchRegisters(pRD); +} + +void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + LIMITED_METHOD_DAC_CONTRACT; + + memcpy(pRD->pCurrentContext, &m_ctx, sizeof(CONTEXT)); + + pRD->ControlPC = m_ctx.Rip; + + pRD->SP = m_ctx.Rsp; + + pRD->pCurrentContextPointers->Rax = &m_ctx.Rax; + pRD->pCurrentContextPointers->Rcx = &m_ctx.Rcx; + pRD->pCurrentContextPointers->Rdx = &m_ctx.Rdx; + pRD->pCurrentContextPointers->Rbx = &m_ctx.Rbx; + pRD->pCurrentContextPointers->Rbp = &m_ctx.Rbp; + pRD->pCurrentContextPointers->Rsi = &m_ctx.Rsi; + pRD->pCurrentContextPointers->Rdi = &m_ctx.Rdi; + pRD->pCurrentContextPointers->R8 = &m_ctx.R8; + pRD->pCurrentContextPointers->R9 = &m_ctx.R9; + pRD->pCurrentContextPointers->R10 = &m_ctx.R10; + pRD->pCurrentContextPointers->R11 = &m_ctx.R11; + pRD->pCurrentContextPointers->R12 = &m_ctx.R12; + pRD->pCurrentContextPointers->R13 = &m_ctx.R13; + pRD->pCurrentContextPointers->R14 = &m_ctx.R14; + pRD->pCurrentContextPointers->R15 = &m_ctx.R15; + + pRD->IsCallerContextValid = FALSE; + pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. +} + +#ifdef FEATURE_HIJACK +TADDR ResumableFrame::GetReturnAddressPtr() +{ + LIMITED_METHOD_DAC_CONTRACT; + return dac_cast<TADDR>(m_Regs) + offsetof(CONTEXT, Rip); +} + +void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACT_VOID + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + SUPPORTS_DAC; + } + CONTRACT_END; + + CopyMemory(pRD->pCurrentContext, m_Regs, sizeof(CONTEXT)); + + pRD->ControlPC = m_Regs->Rip; + + pRD->SP = m_Regs->Rsp; + + pRD->pCurrentContextPointers->Rax = &m_Regs->Rax; + pRD->pCurrentContextPointers->Rcx = &m_Regs->Rcx; + pRD->pCurrentContextPointers->Rdx = &m_Regs->Rdx; + pRD->pCurrentContextPointers->Rbx = &m_Regs->Rbx; + pRD->pCurrentContextPointers->Rbp = &m_Regs->Rbp; + pRD->pCurrentContextPointers->Rsi = &m_Regs->Rsi; + pRD->pCurrentContextPointers->Rdi = &m_Regs->Rdi; + pRD->pCurrentContextPointers->R8 = &m_Regs->R8; + pRD->pCurrentContextPointers->R9 = &m_Regs->R9; + pRD->pCurrentContextPointers->R10 = &m_Regs->R10; + pRD->pCurrentContextPointers->R11 = &m_Regs->R11; + pRD->pCurrentContextPointers->R12 = &m_Regs->R12; + pRD->pCurrentContextPointers->R13 = &m_Regs->R13; + pRD->pCurrentContextPointers->R14 = &m_Regs->R14; + pRD->pCurrentContextPointers->R15 = &m_Regs->R15; + + pRD->IsCallerContextValid = FALSE; + pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. + + RETURN; +} + +// The HijackFrame has to know the registers that are pushed by OnHijackTripThread +void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + SUPPORTS_DAC; + } + CONTRACTL_END; + + pRD->IsCallerContextValid = FALSE; + pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. + + pRD->pCurrentContext->Rip = m_ReturnAddress; + pRD->pCurrentContext->Rsp = PTR_TO_MEMBER_TADDR(HijackArgs, m_Args, Rip) + sizeof(void *); + + UpdateRegDisplayFromCalleeSavedRegisters(pRD, &(m_Args->Regs)); + +#ifdef UNIX_AMD64_ABI + pRD->pCurrentContextPointers->Rsi = NULL; + pRD->pCurrentContextPointers->Rdi = NULL; +#endif + pRD->pCurrentContextPointers->Rcx = NULL; +#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING + pRD->pCurrentContextPointers->Rdx = (PULONG64)&m_Args->Rdx; +#else // FEATURE_UNIX_AMD64_STRUCT_PASSING + pRD->pCurrentContextPointers->Rdx = NULL; +#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING + pRD->pCurrentContextPointers->R8 = NULL; + pRD->pCurrentContextPointers->R9 = NULL; + pRD->pCurrentContextPointers->R10 = NULL; + pRD->pCurrentContextPointers->R11 = NULL; + + pRD->pCurrentContextPointers->Rax = (PULONG64)&m_Args->Rax; + + SyncRegDisplayToCurrentContext(pRD); + +/* + // This only describes the top-most frame + pRD->pContext = NULL; + + + pRD->PCTAddr = dac_cast<TADDR>(m_Args) + offsetof(HijackArgs, Rip); + //pRD->pPC = PTR_SLOT(pRD->PCTAddr); + pRD->SP = (ULONG64)(pRD->PCTAddr + sizeof(TADDR)); +*/ +} +#endif // FEATURE_HIJACK + +BOOL isJumpRel32(PCODE pCode) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + SO_TOLERANT; + SUPPORTS_DAC; + } CONTRACTL_END; + + PTR_BYTE pbCode = PTR_BYTE(pCode); + + return 0xE9 == pbCode[0]; +} + +// +// Given the same pBuffer that was used by emitJump this +// method decodes the instructions and returns the jump target +// +PCODE decodeJump32(PCODE pBuffer) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + SO_TOLERANT; + SUPPORTS_DAC; + } + CONTRACTL_END; + + // jmp rel32 + _ASSERTE(isJumpRel32(pBuffer)); + + return rel32Decode(pBuffer+1); +} + +BOOL isJumpRel64(PCODE pCode) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + SO_TOLERANT; + SUPPORTS_DAC; + } CONTRACTL_END; + + PTR_BYTE pbCode = PTR_BYTE(pCode); + + return 0x48 == pbCode[0] && + 0xB8 == pbCode[1] && + 0xFF == pbCode[10] && + 0xE0 == pbCode[11]; +} + +PCODE decodeJump64(PCODE pBuffer) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + SO_TOLERANT; + SUPPORTS_DAC; + } + CONTRACTL_END; + + // mov rax, xxx + // jmp rax + _ASSERTE(isJumpRel64(pBuffer)); + + return *PTR_UINT64(pBuffer+2); +} + +#ifdef DACCESS_COMPILE +BOOL GetAnyThunkTarget (CONTEXT *pctx, TADDR *pTarget, TADDR *pTargetMethodDesc) +{ + TADDR pThunk = GetIP(pctx); + + *pTargetMethodDesc = NULL; + + // + // Check for something generated by emitJump. + // + if (isJumpRel64(pThunk)) + { + *pTarget = decodeJump64(pThunk); + return TRUE; + } + + return FALSE; +} +#endif // DACCESS_COMPILE + + +#ifndef DACCESS_COMPILE + +// Note: This is only used on server GC on Windows. +// +// This function returns the number of logical processors on a given physical chip. If it cannot +// determine the number of logical cpus, or the machine is not populated uniformly with the same +// type of processors, this function returns 1. + +extern "C" DWORD __stdcall getcpuid(DWORD arg, unsigned char result[16]); + +// fix this if/when AMD does multicore or SMT +DWORD GetLogicalCpuCount() +{ + // No CONTRACT possible because GetLogicalCpuCount uses SEH + + STATIC_CONTRACT_THROWS; + STATIC_CONTRACT_GC_NOTRIGGER; + + static DWORD val = 0; + + // cache value for later re-use + if (val) + { + return val; + } + + struct Param : DefaultCatchFilterParam + { + DWORD retVal; + } param; + param.pv = COMPLUS_EXCEPTION_EXECUTE_HANDLER; + param.retVal = 1; + + PAL_TRY(Param *, pParam, ¶m) + { + + unsigned char buffer[16]; + DWORD maxCpuId = getcpuid(0, buffer); + DWORD* dwBuffer = (DWORD*)buffer; + + if (maxCpuId < 1) + goto qExit; + + if (dwBuffer[1] == 'uneG') { + if (dwBuffer[3] == 'Ieni') { + if (dwBuffer[2] == 'letn') { // get SMT/multicore enumeration for Intel EM64T + + + // TODO: Currently GetLogicalCpuCountFromOS() and GetLogicalCpuCountFallback() are broken on + // multi-core processor, but we never call into those two functions since we don't halve the + // gen0size when it's prescott and above processor. We keep the old version here for earlier + // generation system(Northwood based), perf data suggests on those systems, halve gen0 size + // still boost the performance(ex:Biztalk boosts about 17%). So on earlier systems(Northwood) + // based, we still go ahead and halve gen0 size. The logic in GetLogicalCpuCountFromOS() + // and GetLogicalCpuCountFallback() works fine for those earlier generation systems. + // If it's a Prescott and above processor or Multi-core, perf data suggests not to halve gen0 + // size at all gives us overall better performance. + // This is going to be fixed with a new version in orcas time frame. + + if( (maxCpuId > 3) && (maxCpuId < 0x80000000) ) + goto qExit; + + val = GetLogicalCpuCountFromOS(); //try to obtain HT enumeration from OS API + if (val ) + { + pParam->retVal = val; // OS API HT enumeration successful, we are Done + goto qExit; + } + + val = GetLogicalCpuCountFallback(); // Fallback to HT enumeration using CPUID + if( val ) + pParam->retVal = val; + } + } + } +qExit: ; + } + + PAL_EXCEPT_FILTER(DefaultCatchFilter) + { + } + PAL_ENDTRY + + if (val == 0) + { + val = param.retVal; + } + + return param.retVal; +} + +void EncodeLoadAndJumpThunk (LPBYTE pBuffer, LPVOID pv, LPVOID pTarget) +{ + CONTRACTL + { + THROWS; + GC_NOTRIGGER; + MODE_ANY; + + PRECONDITION(CheckPointer(pBuffer)); + } + CONTRACTL_END; + + // mov r10, pv 49 ba xx xx xx xx xx xx xx xx + + pBuffer[0] = 0x49; + pBuffer[1] = 0xBA; + + *((UINT64 UNALIGNED *)&pBuffer[2]) = (UINT64)pv; + + // mov rax, pTarget 48 b8 xx xx xx xx xx xx xx xx + + pBuffer[10] = 0x48; + pBuffer[11] = 0xB8; + + *((UINT64 UNALIGNED *)&pBuffer[12]) = (UINT64)pTarget; + + // jmp rax ff e0 + + pBuffer[20] = 0xFF; + pBuffer[21] = 0xE0; + + _ASSERTE(DbgIsExecutable(pBuffer, 22)); +} + +void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target) +{ + CONTRACT_VOID + { + THROWS; + GC_NOTRIGGER; + MODE_ANY; + } + CONTRACT_END; + + BYTE *pBuffer = (BYTE*)pCOMMethod - COMMETHOD_CALL_PRESTUB_SIZE; + + // We need the target to be in a 64-bit aligned memory location and the call instruction + // to immediately precede the ComCallMethodDesc. We'll generate an indirect call to avoid + // consuming 3 qwords for this (mov rax, | target | nops & call rax). + + // dq 123456789abcdef0h + // nop 90 + // nop 90 + // call [$ - 10] ff 15 f0 ff ff ff + + *((UINT64 *)&pBuffer[COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET]) = (UINT64)target; + + pBuffer[-2] = 0x90; + pBuffer[-1] = 0x90; + + pBuffer[0] = 0xFF; + pBuffer[1] = 0x15; + *((UINT32 UNALIGNED *)&pBuffer[2]) = (UINT32)(COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET - COMMETHOD_CALL_PRESTUB_SIZE); + + _ASSERTE(DbgIsExecutable(pBuffer, COMMETHOD_CALL_PRESTUB_SIZE)); + + RETURN; +} + +void emitJump(LPBYTE pBuffer, LPVOID target) +{ + CONTRACTL + { + THROWS; + GC_NOTRIGGER; + MODE_ANY; + + PRECONDITION(CheckPointer(pBuffer)); + } + CONTRACTL_END; + + // mov rax, 123456789abcdef0h 48 b8 xx xx xx xx xx xx xx xx + // jmp rax ff e0 + + pBuffer[0] = 0x48; + pBuffer[1] = 0xB8; + + *((UINT64 UNALIGNED *)&pBuffer[2]) = (UINT64)target; + + pBuffer[10] = 0xFF; + pBuffer[11] = 0xE0; + + _ASSERTE(DbgIsExecutable(pBuffer, 12)); +} + +void UMEntryThunkCode::Encode(BYTE* pTargetCode, void* pvSecretParam) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + } + CONTRACTL_END; + + // padding // CC CC CC CC + // mov r10, pUMEntryThunk // 49 ba xx xx xx xx xx xx xx xx // METHODDESC_REGISTER + // mov rax, pJmpDest // 48 b8 xx xx xx xx xx xx xx xx // need to ensure this imm64 is qword aligned + // TAILJMP_RAX // 48 FF E0 + +#ifdef _DEBUG + m_padding[0] = X86_INSTR_INT3; + m_padding[1] = X86_INSTR_INT3; + m_padding[2] = X86_INSTR_INT3; + m_padding[3] = X86_INSTR_INT3; +#endif // _DEBUG + m_movR10[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT | REX_OPCODE_REG_EXT; + m_movR10[1] = 0xBA; + m_uet = pvSecretParam; + m_movRAX[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + m_movRAX[1] = 0xB8; + m_execstub = pTargetCode; + m_jmpRAX[0] = REX_PREFIX_BASE | REX_OPERAND_SIZE_64BIT; + m_jmpRAX[1] = 0xFF; + m_jmpRAX[2] = 0xE0; + + _ASSERTE(DbgIsExecutable(&m_movR10[0], &m_jmpRAX[3]-&m_movR10[0])); +} + +UMEntryThunk* UMEntryThunk::Decode(LPVOID pCallback) +{ + LIMITED_METHOD_CONTRACT; + + UMEntryThunkCode *pThunkCode = (UMEntryThunkCode*)((BYTE*)pCallback - UMEntryThunkCode::GetEntryPointOffset()); + + return (UMEntryThunk*)pThunkCode->m_uet; +} + +INT32 rel32UsingJumpStub(INT32 UNALIGNED * pRel32, PCODE target, MethodDesc *pMethod, LoaderAllocator *pLoaderAllocator /* = NULL */) +{ + CONTRACTL + { + THROWS; // Creating a JumpStub could throw OutOfMemory + GC_NOTRIGGER; + + PRECONDITION(pMethod != NULL || pLoaderAllocator != NULL); + // If a loader allocator isn't explicitly provided, we must be able to get one via the MethodDesc. + PRECONDITION(pLoaderAllocator != NULL || pMethod->GetLoaderAllocator() != NULL); + // If a domain is provided, the MethodDesc mustn't yet be set up to have one, or it must match the MethodDesc's domain, + // unless we're in a compilation domain (NGen loads assemblies as domain-bound but compiles them as domain neutral). + PRECONDITION(!pLoaderAllocator || !pMethod || pMethod->GetMethodDescChunk()->GetMethodTablePtr()->IsNull() || + pLoaderAllocator == pMethod->GetMethodDescChunk()->GetFirstMethodDesc()->GetLoaderAllocatorForCode() || IsCompilationProcess()); + } + CONTRACTL_END; + + TADDR baseAddr = (TADDR)pRel32 + 4; + + INT_PTR offset = target - baseAddr; + + if (!FitsInI4(offset) INDEBUG(|| PEDecoder::GetForceRelocs())) + { + TADDR loAddr = baseAddr + INT32_MIN; + if (loAddr > baseAddr) loAddr = UINT64_MIN; // overflow + + TADDR hiAddr = baseAddr + INT32_MAX; + if (hiAddr < baseAddr) hiAddr = UINT64_MAX; // overflow + + PCODE jumpStubAddr = ExecutionManager::jumpStub(pMethod, + target, + (BYTE *)loAddr, + (BYTE *)hiAddr, + pLoaderAllocator); + + offset = jumpStubAddr - baseAddr; + + if (!FitsInI4(offset)) + { + _ASSERTE(!"jump stub was not in expected range"); + EEPOLICY_HANDLE_FATAL_ERROR(COR_E_EXECUTIONENGINE); + } + } + + _ASSERTE(FitsInI4(offset)); + return static_cast<INT32>(offset); +} + +BOOL DoesSlotCallPrestub(PCODE pCode) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + SO_TOLERANT; + PRECONDITION(pCode != GetPreStubEntryPoint()); + } CONTRACTL_END; + + // AMD64 has the following possible sequences for prestub logic: + // 1. slot -> temporary entrypoint -> prestub + // 2. slot -> precode -> prestub + // 3. slot -> precode -> jumprel64 (jump stub) -> prestub + // 4. slot -> precode -> jumprel64 (NGEN case) -> prestub + +#ifdef HAS_COMPACT_ENTRYPOINTS + if (MethodDescChunk::GetMethodDescFromCompactEntryPoint(pCode, TRUE) != NULL) + { + return TRUE; + } +#endif + + if (!IS_ALIGNED(pCode, PRECODE_ALIGNMENT)) + { + return FALSE; + } + +#ifdef HAS_FIXUP_PRECODE + if (*PTR_BYTE(pCode) == X86_INSTR_CALL_REL32) + { + // Note that call could have been patched to jmp in the meantime + pCode = rel32Decode(pCode+1); + +#ifdef FEATURE_PREJIT + // NGEN helper + if (*PTR_BYTE(pCode) == X86_INSTR_JMP_REL32) { + pCode = (TADDR)rel32Decode(pCode+1); + } +#endif + + // JumpStub + if (isJumpRel64(pCode)) { + pCode = decodeJump64(pCode); + } + + return pCode == (TADDR)PrecodeFixupThunk; + } +#endif + + if (*PTR_USHORT(pCode) != X86_INSTR_MOV_R10_IMM64 || // mov rax,XXXX + *PTR_BYTE(pCode+10) != X86_INSTR_NOP || // nop + *PTR_BYTE(pCode+11) != X86_INSTR_JMP_REL32) // jmp rel32 + { + return FALSE; + } + pCode = rel32Decode(pCode+12); + +#ifdef FEATURE_PREJIT + // NGEN helper + if (*PTR_BYTE(pCode) == X86_INSTR_JMP_REL32) { + pCode = (TADDR)rel32Decode(pCode+1); + } +#endif + + // JumpStub + if (isJumpRel64(pCode)) { + pCode = decodeJump64(pCode); + } + + return pCode == GetPreStubEntryPoint(); +} + +// +// Some AMD64 assembly functions have one or more DWORDS at the end of the function +// that specify the offsets where significant instructions are +// we use this function to get at these offsets +// +DWORD GetOffsetAtEndOfFunction(ULONGLONG uImageBase, + PT_RUNTIME_FUNCTION pFunctionEntry, + int offsetNum /* = 1*/) +{ + CONTRACTL + { + MODE_ANY; + NOTHROW; + GC_NOTRIGGER; + SO_TOLERANT; + PRECONDITION((offsetNum > 0) && (offsetNum < 20)); /* we only allow reasonable offsetNums 1..19 */ + } + CONTRACTL_END; + + DWORD functionSize = pFunctionEntry->EndAddress - pFunctionEntry->BeginAddress; + BYTE* pEndOfFunction = (BYTE*) (uImageBase + pFunctionEntry->EndAddress); + DWORD* pOffset = (DWORD*) (pEndOfFunction) - offsetNum; + DWORD offsetInFunc = *pOffset; + + _ASSERTE_ALL_BUILDS("clr/src/VM/AMD64/cGenAMD64.cpp", (offsetInFunc >= 0) && (offsetInFunc < functionSize)); + + return offsetInFunc; +} + +//========================================================================================== +// In NGen image, virtual slots inherited from cross-module dependencies point to jump thunks. +// These jump thunk initially point to VirtualMethodFixupStub which transfers control here. +// This method 'VirtualMethodFixupWorker' will patch the jump thunk to point to the actual +// inherited method body after we have execute the precode and a stable entry point. +// +EXTERN_C PCODE VirtualMethodFixupWorker(TransitionBlock * pTransitionBlock, CORCOMPILE_VIRTUAL_IMPORT_THUNK * pThunk) +{ + CONTRACTL + { + THROWS; + GC_TRIGGERS; // GC not allowed until we call pEMFrame->SetFunction(pMD); + + ENTRY_POINT; + } + CONTRACTL_END; + + MAKE_CURRENT_THREAD_AVAILABLE(); + + PCODE pCode = NULL; + MethodDesc * pMD = NULL; + +#ifdef _DEBUG + Thread::ObjectRefFlush(CURRENT_THREAD); +#endif + + BEGIN_SO_INTOLERANT_CODE(CURRENT_THREAD); + + _ASSERTE(IS_ALIGNED((size_t)pThunk, sizeof(INT64))); + + FrameWithCookie<ExternalMethodFrame> frame(pTransitionBlock); + ExternalMethodFrame * pEMFrame = &frame; + + OBJECTREF pThisPtr = pEMFrame->GetThis(); + _ASSERTE(pThisPtr != NULL); + VALIDATEOBJECT(pThisPtr); + + MethodTable * pMT = pThisPtr->GetTrueMethodTable(); + + WORD slotNumber = pThunk->slotNum; + _ASSERTE(slotNumber != (WORD)-1); + + pCode = pMT->GetRestoredSlot(slotNumber); + + if (!DoesSlotCallPrestub(pCode)) + { + pMD = MethodTable::GetMethodDescForSlotAddress(pCode); + + pEMFrame->SetFunction(pMD); // We will use the pMD to enumerate the GC refs in the arguments + pEMFrame->Push(CURRENT_THREAD); + + INSTALL_MANAGED_EXCEPTION_DISPATCHER; + INSTALL_UNWIND_AND_CONTINUE_HANDLER_NO_PROBE; + + // Skip fixup precode jump for better perf + PCODE pDirectTarget = Precode::TryToSkipFixupPrecode(pCode); + if (pDirectTarget != NULL) + pCode = pDirectTarget; + + INT64 oldValue = *(INT64*)pThunk; + BYTE* pOldValue = (BYTE*)&oldValue; + + if (pOldValue[0] == X86_INSTR_CALL_REL32) + { + INT64 newValue = oldValue; + BYTE* pNewValue = (BYTE*)&newValue; + pNewValue[0] = X86_INSTR_JMP_REL32; + + *(INT32 *)(pNewValue+1) = rel32UsingJumpStub((INT32*)(&pThunk->callJmp[1]), pCode, pMD, NULL); + + _ASSERTE(IS_ALIGNED(pThunk, sizeof(INT64))); + EnsureWritableExecutablePages(pThunk, sizeof(INT64)); + FastInterlockCompareExchangeLong((INT64*)pThunk, newValue, oldValue); + + FlushInstructionCache(GetCurrentProcess(), pThunk, 8); + } + + UNINSTALL_UNWIND_AND_CONTINUE_HANDLER_NO_PROBE; + UNINSTALL_MANAGED_EXCEPTION_DISPATCHER; + pEMFrame->Pop(CURRENT_THREAD); + } + + // Ready to return + + END_SO_INTOLERANT_CODE; + + return pCode; +} + +#ifdef FEATURE_READYTORUN + +// +// Allocation of dynamic helpers +// + +#define DYNAMIC_HELPER_ALIGNMENT sizeof(TADDR) + +#define BEGIN_DYNAMIC_HELPER_EMIT(size) \ + SIZE_T cb = size; \ + SIZE_T cbAligned = ALIGN_UP(cb, DYNAMIC_HELPER_ALIGNMENT); \ + BYTE * pStart = (BYTE *)(void *)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \ + BYTE * p = pStart; + +#define END_DYNAMIC_HELPER_EMIT() \ + _ASSERTE(pStart + cb == p); \ + while (p < pStart + cbAligned) *p++ = X86_INSTR_INT3; \ + ClrFlushInstructionCache(pStart, cbAligned); \ + return (PCODE)pStart + +PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + STANDARD_VM_CONTRACT; + + BEGIN_DYNAMIC_HELPER_EMIT(15); + +#ifdef UNIX_AMD64_ABI + *(UINT16 *)p = 0xBF48; // mov rdi, XXXXXX +#else + *(UINT16 *)p = 0xB948; // mov rcx, XXXXXX +#endif + p += 2; + *(TADDR *)p = arg; + p += 8; + + *p++ = X86_INSTR_JMP_REL32; // jmp rel32 + *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator); + p += 4; + + END_DYNAMIC_HELPER_EMIT(); +} + +void DynamicHelpers::EmitHelperWithArg(BYTE*& p, LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + CONTRACTL + { + GC_NOTRIGGER; + PRECONDITION(p != NULL && target != NULL); + } + CONTRACTL_END; + + // Move an an argument into the second argument register and jump to a target function. + +#ifdef UNIX_AMD64_ABI + *(UINT16 *)p = 0xBE48; // mov rsi, XXXXXX +#else + *(UINT16 *)p = 0xBA48; // mov rdx, XXXXXX +#endif + p += 2; + *(TADDR *)p = arg; + p += 8; + + *p++ = X86_INSTR_JMP_REL32; // jmp rel32 + *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator); + p += 4; +} + +PCODE DynamicHelpers::CreateHelperWithArg(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + BEGIN_DYNAMIC_HELPER_EMIT(15); + + EmitHelperWithArg(p, pAllocator, arg, target); + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target) +{ + BEGIN_DYNAMIC_HELPER_EMIT(25); + +#ifdef UNIX_AMD64_ABI + *(UINT16 *)p = 0xBF48; // mov rdi, XXXXXX +#else + *(UINT16 *)p = 0xB948; // mov rcx, XXXXXX +#endif + p += 2; + *(TADDR *)p = arg; + p += 8; + +#ifdef UNIX_AMD64_ABI + *(UINT16 *)p = 0xBE48; // mov rsi, XXXXXX +#else + *(UINT16 *)p = 0xBA48; // mov rdx, XXXXXX +#endif + p += 2; + *(TADDR *)p = arg2; + p += 8; + + *p++ = X86_INSTR_JMP_REL32; // jmp rel32 + *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator); + p += 4; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateHelperArgMove(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + BEGIN_DYNAMIC_HELPER_EMIT(18); + +#ifdef UNIX_AMD64_ABI + *p++ = 0x48; // mov rsi, rdi + *(UINT16 *)p = 0xF78B; +#else + *p++ = 0x48; // mov rdx, rcx + *(UINT16 *)p = 0xD18B; +#endif + p += 2; + +#ifdef UNIX_AMD64_ABI + *(UINT16 *)p = 0xBF48; // mov rdi, XXXXXX +#else + *(UINT16 *)p = 0xB948; // mov rcx, XXXXXX +#endif + p += 2; + *(TADDR *)p = arg; + p += 8; + + *p++ = X86_INSTR_JMP_REL32; // jmp rel32 + *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator); + p += 4; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateReturn(LoaderAllocator * pAllocator) +{ + BEGIN_DYNAMIC_HELPER_EMIT(1); + + *p++ = 0xC3; // ret + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateReturnConst(LoaderAllocator * pAllocator, TADDR arg) +{ + BEGIN_DYNAMIC_HELPER_EMIT(11); + + *(UINT16 *)p = 0xB848; // mov rax, XXXXXX + p += 2; + *(TADDR *)p = arg; + p += 8; + + *p++ = 0xC3; // ret + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateReturnIndirConst(LoaderAllocator * pAllocator, TADDR arg, INT8 offset) +{ + BEGIN_DYNAMIC_HELPER_EMIT((offset != 0) ? 15 : 11); + + *(UINT16 *)p = 0xA148; // mov rax, [XXXXXX] + p += 2; + *(TADDR *)p = arg; + p += 8; + + if (offset != 0) + { + // add rax, <offset> + *p++ = 0x48; + *p++ = 0x83; + *p++ = 0xC0; + *p++ = offset; + } + + *p++ = 0xC3; // ret + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + BEGIN_DYNAMIC_HELPER_EMIT(15); + +#ifdef UNIX_AMD64_ABI + *(UINT16 *)p = 0xBA48; // mov rdx, XXXXXX +#else + *(UINT16 *)p = 0xB849; // mov r8, XXXXXX +#endif + p += 2; + *(TADDR *)p = arg; + p += 8; + + *p++ = X86_INSTR_JMP_REL32; // jmp rel32 + *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator); + p += 4; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target) +{ + BEGIN_DYNAMIC_HELPER_EMIT(25); + +#ifdef UNIX_AMD64_ABI + *(UINT16 *)p = 0xBA48; // mov rdx, XXXXXX +#else + *(UINT16 *)p = 0xB849; // mov r8, XXXXXX +#endif + p += 2; + *(TADDR *)p = arg; + p += 8; + +#ifdef UNIX_AMD64_ABI + *(UINT16 *)p = 0xB948; // mov rcx, XXXXXX +#else + *(UINT16 *)p = 0xB949; // mov r9, XXXXXX +#endif + p += 2; + *(TADDR *)p = arg2; + p += 8; + + *p++ = X86_INSTR_JMP_REL32; // jmp rel32 + *(INT32 *)p = rel32UsingJumpStub((INT32 *)p, target, NULL, pAllocator); + p += 4; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator, CORINFO_RUNTIME_LOOKUP * pLookup, DWORD dictionaryIndexAndSlot, Module * pModule) +{ + STANDARD_VM_CONTRACT; + + PCODE helperAddress = (pLookup->helper == CORINFO_HELP_RUNTIMEHANDLE_METHOD ? + GetEEFuncEntryPoint(JIT_GenericHandleMethodWithSlotAndModule) : + GetEEFuncEntryPoint(JIT_GenericHandleClassWithSlotAndModule)); + + GenericHandleArgs * pArgs = (GenericHandleArgs *)(void *)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(sizeof(GenericHandleArgs), DYNAMIC_HELPER_ALIGNMENT); + pArgs->dictionaryIndexAndSlot = dictionaryIndexAndSlot; + pArgs->signature = pLookup->signature; + pArgs->module = (CORINFO_MODULE_HANDLE)pModule; + + // It's available only via the run-time helper function + if (pLookup->indirections == CORINFO_USEHELPER) + { + BEGIN_DYNAMIC_HELPER_EMIT(15); + + // rcx/rdi contains the generic context parameter + // mov rdx/rsi,pArgs + // jmp helperAddress + EmitHelperWithArg(p, pAllocator, (TADDR)pArgs, helperAddress); + + END_DYNAMIC_HELPER_EMIT(); + } + else + { + int indirectionsSize = 0; + for (WORD i = 0; i < pLookup->indirections; i++) + indirectionsSize += (pLookup->offsets[i] >= 0x80 ? 7 : 4); + + int codeSize = indirectionsSize + (pLookup->testForNull ? 30 : 4); + + BEGIN_DYNAMIC_HELPER_EMIT(codeSize); + + if (pLookup->testForNull) + { + // rcx/rdi contains the generic context parameter. Save a copy of it in the rax register +#ifdef UNIX_AMD64_ABI + *(UINT32*)p = 0x00f88948; p += 3; // mov rax,rdi +#else + *(UINT32*)p = 0x00c88948; p += 3; // mov rax,rcx +#endif + } + + for (WORD i = 0; i < pLookup->indirections; i++) + { +#ifdef UNIX_AMD64_ABI + // mov rdi,qword ptr [rdi+offset] + if (pLookup->offsets[i] >= 0x80) + { + *(UINT32*)p = 0x00bf8b48; p += 3; + *(UINT32*)p = (UINT32)pLookup->offsets[i]; p += 4; + } + else + { + *(UINT32*)p = 0x007f8b48; p += 3; + *p++ = (BYTE)pLookup->offsets[i]; + } +#else + // mov rcx,qword ptr [rcx+offset] + if (pLookup->offsets[i] >= 0x80) + { + *(UINT32*)p = 0x00898b48; p += 3; + *(UINT32*)p = (UINT32)pLookup->offsets[i]; p += 4; + } + else + { + *(UINT32*)p = 0x00498b48; p += 3; + *p++ = (BYTE)pLookup->offsets[i]; + } +#endif + } + + // No null test required + if (!pLookup->testForNull) + { + // No fixups needed for R2R + +#ifdef UNIX_AMD64_ABI + *(UINT32*)p = 0x00f88948; p += 3; // mov rax,rdi +#else + *(UINT32*)p = 0x00c88948; p += 3; // mov rax,rcx +#endif + *p++ = 0xC3; // ret + } + else + { + // rcx/rdi contains the value of the dictionary slot entry + + _ASSERTE(pLookup->indirections != 0); + +#ifdef UNIX_AMD64_ABI + *(UINT32*)p = 0x00ff8548; p += 3; // test rdi,rdi +#else + *(UINT32*)p = 0x00c98548; p += 3; // test rcx,rcx +#endif + + // je 'HELPER_CALL' (a jump of 4 bytes) + *(UINT16*)p = 0x0474; p += 2; + +#ifdef UNIX_AMD64_ABI + *(UINT32*)p = 0x00f88948; p += 3; // mov rax,rdi +#else + *(UINT32*)p = 0x00c88948; p += 3; // mov rax,rcx +#endif + *p++ = 0xC3; // ret + + // 'HELPER_CALL' + { + // Put the generic context back into rcx (was previously saved in rax) +#ifdef UNIX_AMD64_ABI + *(UINT32*)p = 0x00c78948; p += 3; // mov rdi,rax +#else + *(UINT32*)p = 0x00c18948; p += 3; // mov rcx,rax +#endif + + // mov rdx,pArgs + // jmp helperAddress + EmitHelperWithArg(p, pAllocator, (TADDR)pArgs, helperAddress); + } + } + + END_DYNAMIC_HELPER_EMIT(); + } +} + +#endif // FEATURE_READYTORUN + +#endif // DACCESS_COMPILE |