diff options
Diffstat (limited to 'src/vm/arm/stubs.cpp')
-rw-r--r-- | src/vm/arm/stubs.cpp | 3948 |
1 files changed, 3948 insertions, 0 deletions
diff --git a/src/vm/arm/stubs.cpp b/src/vm/arm/stubs.cpp new file mode 100644 index 0000000000..0b069da47e --- /dev/null +++ b/src/vm/arm/stubs.cpp @@ -0,0 +1,3948 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. +// +// File: stubs.cpp +// +// This file contains stub functions for unimplemented features need to +// run on the ARM platform. + +#include "common.h" +#include "jitinterface.h" +#include "comdelegate.h" +#include "invokeutil.h" +#include "excep.h" +#include "class.h" +#include "field.h" +#include "dllimportcallback.h" +#include "dllimport.h" +#ifdef FEATURE_REMOTING +#include "remoting.h" +#endif +#include "eeconfig.h" +#include "cgensys.h" +#include "asmconstants.h" +#include "security.h" +#include "securitydescriptor.h" +#include "virtualcallstub.h" +#include "gcdump.h" +#include "rtlfunctions.h" +#include "codeman.h" +#include "tls.h" +#include "ecall.h" +#include "threadsuspend.h" + +// target write barriers +EXTERN_C void JIT_WriteBarrier(Object **dst, Object *ref); +EXTERN_C void JIT_WriteBarrier_End(); +EXTERN_C void JIT_CheckedWriteBarrier(Object **dst, Object *ref); +EXTERN_C void JIT_CheckedWriteBarrier_End(); +EXTERN_C void JIT_ByRefWriteBarrier_End(); +EXTERN_C void JIT_ByRefWriteBarrier_SP(Object **dst, Object *ref); + +// source write barriers +EXTERN_C void JIT_WriteBarrier_SP_Pre(Object **dst, Object *ref); +EXTERN_C void JIT_WriteBarrier_SP_Pre_End(); +EXTERN_C void JIT_WriteBarrier_SP_Post(Object **dst, Object *ref); +EXTERN_C void JIT_WriteBarrier_SP_Post_End(); +EXTERN_C void JIT_WriteBarrier_MP_Pre(Object **dst, Object *ref); +EXTERN_C void JIT_WriteBarrier_MP_Pre_End(); +EXTERN_C void JIT_WriteBarrier_MP_Post(Object **dst, Object *ref); +EXTERN_C void JIT_WriteBarrier_MP_Post_End(); + +EXTERN_C void JIT_CheckedWriteBarrier_SP_Pre(Object **dst, Object *ref); +EXTERN_C void JIT_CheckedWriteBarrier_SP_Pre_End(); +EXTERN_C void JIT_CheckedWriteBarrier_SP_Post(Object **dst, Object *ref); +EXTERN_C void JIT_CheckedWriteBarrier_SP_Post_End(); +EXTERN_C void JIT_CheckedWriteBarrier_MP_Pre(Object **dst, Object *ref); +EXTERN_C void JIT_CheckedWriteBarrier_MP_Pre_End(); +EXTERN_C void JIT_CheckedWriteBarrier_MP_Post(Object **dst, Object *ref); +EXTERN_C void JIT_CheckedWriteBarrier_MP_Post_End(); + +EXTERN_C void JIT_ByRefWriteBarrier_SP_Pre(); +EXTERN_C void JIT_ByRefWriteBarrier_SP_Pre_End(); +EXTERN_C void JIT_ByRefWriteBarrier_SP_Post(); +EXTERN_C void JIT_ByRefWriteBarrier_SP_Post_End(); +EXTERN_C void JIT_ByRefWriteBarrier_MP_Pre(); +EXTERN_C void JIT_ByRefWriteBarrier_MP_Pre_End(); +EXTERN_C void JIT_ByRefWriteBarrier_MP_Post(Object **dst, Object *ref); +EXTERN_C void JIT_ByRefWriteBarrier_MP_Post_End(); + +EXTERN_C void JIT_PatchedWriteBarrierStart(); +EXTERN_C void JIT_PatchedWriteBarrierLast(); + +#ifndef DACCESS_COMPILE +//----------------------------------------------------------------------- +// InstructionFormat for conditional jump. +//----------------------------------------------------------------------- +class ThumbCondJump : public InstructionFormat +{ + public: + ThumbCondJump() : InstructionFormat(InstructionFormat::k16) + { + LIMITED_METHOD_CONTRACT; + } + + virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode) + { + LIMITED_METHOD_CONTRACT + + _ASSERTE(refsize == InstructionFormat::k16); + + return 2; + } + + virtual UINT GetHotSpotOffset(UINT refsize, UINT variationCode) + { + LIMITED_METHOD_CONTRACT + + _ASSERTE(refsize == InstructionFormat::k16); + + return 4; + } + + //CB{N}Z Rn, <Label> + //Encoding 1|0|1|1|op|0|i|1|imm5|Rn + //op = Bit3(variation) + //Rn = Bits2-0(variation) + virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT variationCode, BYTE *pDataBuffer) + { + LIMITED_METHOD_CONTRACT + + _ASSERTE(refsize == InstructionFormat::k16); + + if(fixedUpReference <0 || fixedUpReference > 126) + COMPlusThrow(kNotSupportedException); + + _ASSERTE((fixedUpReference & 0x1) == 0); + + pOutBuffer[0] = static_cast<BYTE>(((0x3e & fixedUpReference) << 2) | (0x7 & variationCode)); + pOutBuffer[1] = static_cast<BYTE>(0xb1 | (0x8 & variationCode)| ((0x40 & fixedUpReference)>>5)); + } +}; + +//----------------------------------------------------------------------- +// InstructionFormat for near Jump and short Jump +//----------------------------------------------------------------------- +class ThumbNearJump : public InstructionFormat +{ + public: + ThumbNearJump() : InstructionFormat(InstructionFormat::k16|InstructionFormat::k32) + { + LIMITED_METHOD_CONTRACT; + } + + virtual UINT GetSizeOfInstruction(UINT refsize, UINT variationCode) + { + LIMITED_METHOD_CONTRACT + + if(refsize == InstructionFormat::k16) + return 2; + else if(refsize == InstructionFormat::k32) + return 4; + else + _ASSERTE(!"Unknown refsize"); + return 0; + } + + virtual VOID EmitInstruction(UINT refsize, __int64 fixedUpReference, BYTE *pOutBuffer, UINT cond, BYTE *pDataBuffer) + { + LIMITED_METHOD_CONTRACT + + _ASSERTE(cond <15); + + //offsets must be in multiples of 2 + _ASSERTE((fixedUpReference & 0x1) == 0); + + if(cond == 0xe) //Always execute + { + if(fixedUpReference >= -2048 && fixedUpReference <= 2046) + { + if(refsize != InstructionFormat::k16) + _ASSERTE(!"Expected refSize to be 2"); + + //Emit T2 encoding of B<c> <label> instruction + pOutBuffer[0] = static_cast<BYTE>((fixedUpReference & 0x1fe)>>1); + pOutBuffer[1] = static_cast<BYTE>(0xe0 | ((fixedUpReference & 0xe00)>>9)); + } + else if(fixedUpReference >= -16777216 && fixedUpReference <= 16777214) + { + if(refsize != InstructionFormat::k32) + _ASSERTE(!"Expected refSize to be 4"); + + //Emit T4 encoding of B<c> <label> instruction + int s = (fixedUpReference & 0x1000000) >> 24; + int i1 = (fixedUpReference & 0x800000) >> 23; + int i2 = (fixedUpReference & 0x400000) >> 22; + pOutBuffer[0] = static_cast<BYTE>((fixedUpReference & 0xff000) >> 12); + pOutBuffer[1] = static_cast<BYTE>(0xf0 | (s << 2) |( (fixedUpReference & 0x300000) >>20)); + pOutBuffer[2] = static_cast<BYTE>((fixedUpReference & 0x1fe) >> 1); + pOutBuffer[3] = static_cast<BYTE>(0x90 | (~(i1^s)) << 5 | (~(i2^s)) << 3 | (fixedUpReference & 0xe00) >> 9); + } + else + { + COMPlusThrow(kNotSupportedException); + } + } + else // conditional branch based on flags + { + if(fixedUpReference >= -256 && fixedUpReference <= 254) + { + if(refsize != InstructionFormat::k16) + _ASSERTE(!"Expected refSize to be 2"); + + //Emit T1 encoding of B<c> <label> instruction + pOutBuffer[0] = static_cast<BYTE>((fixedUpReference & 0x1fe)>>1); + pOutBuffer[1] = static_cast<BYTE>(0xd0 | (cond & 0xf)); + } + else if(fixedUpReference >= -1048576 && fixedUpReference <= 1048574) + { + if(refsize != InstructionFormat::k32) + _ASSERTE(!"Expected refSize to be 4"); + + //Emit T3 encoding of B<c> <label> instruction + pOutBuffer[0] = static_cast<BYTE>(((cond & 0x3) << 6) | ((fixedUpReference & 0x3f000) >>12)); + pOutBuffer[1] = static_cast<BYTE>(0xf0 | ((fixedUpReference & 0x100000) >>18) | ((cond & 0xc) >> 2)); + pOutBuffer[2] = static_cast<BYTE>((fixedUpReference & 0x1fe) >> 1); + pOutBuffer[3] = static_cast<BYTE>(0x80 | ((fixedUpReference & 0x40000) >> 13) | ((fixedUpReference & 0x80000) >> 16) | ((fixedUpReference & 0xe00) >> 9)); + } + else + { + COMPlusThrow(kNotSupportedException); + } + } + } + + virtual BOOL CanReach(UINT refsize, UINT variationCode, BOOL fExternal, INT_PTR offset) + { + LIMITED_METHOD_CONTRACT + + if (fExternal) + { + _ASSERTE(0); + return FALSE; + } + else + { + switch (refsize) + { + case InstructionFormat::k16: + if(variationCode == 0xe) + return (offset >= -2048 && offset <= 2046 && (offset & 0x1) == 0); + else + return (offset >= -256 && offset <= 254 && (offset & 0x1) == 0); + case InstructionFormat::k32: + if(variationCode == 0xe) + return ((offset >= -16777216) && (offset <= 16777214) && ((offset & 0x1) == 0)); + else + return ((offset >= -1048576) && (offset <= 1048574) && ((offset & 0x1) == 0)); + default: + _ASSERTE(!"Unknown refsize"); + return FALSE; + } + } + } + + virtual UINT GetHotSpotOffset(UINT refsize, UINT variationCode) + { + LIMITED_METHOD_CONTRACT + + _ASSERTE(refsize == InstructionFormat::k16 || refsize == InstructionFormat::k32); + + return 4; + } +}; + + +//static conditional jump instruction format object +static BYTE gThumbCondJump[sizeof(ThumbCondJump)]; + +//static near jump instruction format object +static BYTE gThumbNearJump[sizeof(ThumbNearJump)]; + +void StubLinkerCPU::Init(void) +{ + //Initialize the object + new (gThumbCondJump) ThumbCondJump(); + new (gThumbNearJump) ThumbNearJump(); +} + +#ifndef CROSSGEN_COMPILE + +// GC write barrier support. +// +// To optimize our write barriers we code the values of several GC globals (e.g. g_lowest_address) directly +// into the barrier function itself, thus avoiding a double memory indirection. Every time the GC modifies one +// of these globals we need to update all of the write barriers accordingly. +// +// In order to keep this process non-brittle we don't hard code the offsets of the instructions that need to +// be changed. Instead the code used to create these barriers is implemented using special macros that record +// the necessary offsets in a descriptor table. Search for "GC write barrier support" in vm\arm\asmhelpers.asm +// for more details. + +// Structure describing the layout of a single write barrier descriptor. This must be kept in sync with the +// code in vm\arm\asmhelpers.asm in the WRITE_BARRIER_END macro. Each offset recorded is for one of the +// supported GC globals (an offset of 0xffff is encoded if that global is not used by the particular barrier +// function). We currently only support one usage of each global by any single barrier function. The offset is +// the byte offset from the start of the function at which a movw,movt instruction pair is used to load the +// value of the global into a register. +struct WriteBarrierDescriptor +{ + BYTE * m_pFuncStart; // Pointer to the start of the barrier function + BYTE * m_pFuncEnd; // Pointer to the end of the barrier function + DWORD m_dw_g_lowest_address_offset; // Offset of the instruction reading g_lowest_address + DWORD m_dw_g_highest_address_offset; // Offset of the instruction reading g_highest_address + DWORD m_dw_g_ephemeral_low_offset; // Offset of the instruction reading g_ephemeral_low + DWORD m_dw_g_ephemeral_high_offset; // Offset of the instruction reading g_ephemeral_high + DWORD m_dw_g_card_table_offset; // Offset of the instruction reading g_card_table +}; + +// Infrastructure used for mapping of the source and destination of current WB patching +struct WriteBarrierMapping +{ + PBYTE to; // Pointer to the write-barrier where it was copied over + PBYTE from; // Pointer to write-barrier from which it was copied +}; + +const int WriteBarrierIndex = 0; +const int CheckedWriteBarrierIndex = 1; +const int ByRefWriteBarrierIndex = 2; +const int MaxWriteBarrierIndex = 3; + +WriteBarrierMapping wbMapping[MaxWriteBarrierIndex] = + { + {(PBYTE)JIT_WriteBarrier, NULL}, + {(PBYTE)JIT_CheckedWriteBarrier, NULL}, + {(PBYTE)JIT_ByRefWriteBarrier, NULL} + }; + +PBYTE FindWBMapping(PBYTE from) +{ + for(int i = 0; i < MaxWriteBarrierIndex; ++i) + { + if(wbMapping[i].from == from) + return wbMapping[i].to; + } + return NULL; +} + +// Pointer to the start of the descriptor table. The end of the table is marked by a sentinel entry +// (m_pFuncStart is NULL). +EXTERN_C WriteBarrierDescriptor g_rgWriteBarrierDescriptors; + +// Determine the range of memory containing all the write barrier implementations (these are clustered +// together and should fit in a page or maybe two). +void ComputeWriteBarrierRange(BYTE ** ppbStart, DWORD * pcbLength) +{ + DWORD size = (PBYTE)JIT_PatchedWriteBarrierLast - (PBYTE)JIT_PatchedWriteBarrierStart; + *ppbStart = (PBYTE)JIT_PatchedWriteBarrierStart; + *pcbLength = size; +} + +void CopyWriteBarrier(PCODE dstCode, PCODE srcCode, PCODE endCode) +{ + TADDR dst = PCODEToPINSTR(dstCode); + TADDR src = PCODEToPINSTR(srcCode); + TADDR end = PCODEToPINSTR(endCode); + + size_t size = (PBYTE)end - (PBYTE)src; + memcpy((PVOID)dst, (PVOID)src, size); +} + +#if _DEBUG +void ValidateWriteBarriers() +{ + // Post-grow WB are bigger than pre-grow so validating that target WB has space to accomodate those + _ASSERTE( ((PBYTE)JIT_WriteBarrier_End - (PBYTE)JIT_WriteBarrier) >= ((PBYTE)JIT_WriteBarrier_MP_Post_End - (PBYTE)JIT_WriteBarrier_MP_Post)); + _ASSERTE( ((PBYTE)JIT_WriteBarrier_End - (PBYTE)JIT_WriteBarrier) >= ((PBYTE)JIT_WriteBarrier_SP_Post_End - (PBYTE)JIT_WriteBarrier_SP_Post)); + + _ASSERTE( ((PBYTE)JIT_CheckedWriteBarrier_End - (PBYTE)JIT_CheckedWriteBarrier) >= ((PBYTE)JIT_CheckedWriteBarrier_MP_Post_End - (PBYTE)JIT_CheckedWriteBarrier_MP_Post)); + _ASSERTE( ((PBYTE)JIT_CheckedWriteBarrier_End - (PBYTE)JIT_CheckedWriteBarrier) >= ((PBYTE)JIT_CheckedWriteBarrier_SP_Post_End - (PBYTE)JIT_CheckedWriteBarrier_SP_Post)); + + _ASSERTE( ((PBYTE)JIT_ByRefWriteBarrier_End - (PBYTE)JIT_ByRefWriteBarrier) >= ((PBYTE)JIT_ByRefWriteBarrier_MP_Post_End - (PBYTE)JIT_ByRefWriteBarrier_MP_Post)); + _ASSERTE( ((PBYTE)JIT_ByRefWriteBarrier_End - (PBYTE)JIT_ByRefWriteBarrier) >= ((PBYTE)JIT_ByRefWriteBarrier_SP_Post_End - (PBYTE)JIT_ByRefWriteBarrier_SP_Post)); + +} +#endif // _DEBUG + +#define UPDATE_WB(_proc,_grow) \ + CopyWriteBarrier((PCODE)JIT_WriteBarrier, (PCODE)JIT_WriteBarrier_ ## _proc ## _ ## _grow , (PCODE)JIT_WriteBarrier_ ## _proc ## _ ## _grow ## _End); \ + wbMapping[WriteBarrierIndex].from = (PBYTE)JIT_WriteBarrier_ ## _proc ## _ ## _grow ; \ + \ + CopyWriteBarrier((PCODE)JIT_CheckedWriteBarrier, (PCODE)JIT_CheckedWriteBarrier_ ## _proc ## _ ## _grow , (PCODE)JIT_CheckedWriteBarrier_ ## _proc ## _ ## _grow ## _End); \ + wbMapping[CheckedWriteBarrierIndex].from = (PBYTE)JIT_CheckedWriteBarrier_ ## _proc ## _ ## _grow ; \ + \ + CopyWriteBarrier((PCODE)JIT_ByRefWriteBarrier, (PCODE)JIT_ByRefWriteBarrier_ ## _proc ## _ ## _grow , (PCODE)JIT_ByRefWriteBarrier_ ## _proc ## _ ## _grow ## _End); \ + wbMapping[ByRefWriteBarrierIndex].from = (PBYTE)JIT_ByRefWriteBarrier_ ## _proc ## _ ## _grow ; \ + +// Update the instructions in our various write barrier implementations that refer directly to the values +// of GC globals such as g_lowest_address and g_card_table. We don't particularly care which values have +// changed on each of these callbacks, it's pretty cheap to refresh them all. +void UpdateGCWriteBarriers(bool postGrow = false) +{ + // Define a helper macro that abstracts the minutia of patching the instructions to access the value of a + // particular GC global. + +#if _DEBUG + ValidateWriteBarriers(); +#endif // _DEBUG + + static bool wbCopyRequired = true; // We begin with a wb copy + static bool wbIsPostGrow = false; // We begin with pre-Grow write barrier + + if(postGrow && !wbIsPostGrow) + { + wbIsPostGrow = true; + wbCopyRequired = true; + } + + if(wbCopyRequired) + { + BOOL mp = g_SystemInfo.dwNumberOfProcessors > 1; + if(mp) + { + if(wbIsPostGrow) + { + UPDATE_WB(MP,Post); + } + else + { + UPDATE_WB(MP,Pre); + } + } + else + { + if(wbIsPostGrow) + { + UPDATE_WB(SP,Post); + } + else + { + UPDATE_WB(SP,Pre); + } + } + + wbCopyRequired = false; + } +#define GWB_PATCH_OFFSET(_global) \ + if (pDesc->m_dw_##_global##_offset != 0xffff) \ + PutThumb2Mov32((UINT16*)(to + pDesc->m_dw_##_global##_offset - 1), (UINT32)(dac_cast<TADDR>(_global))); + + // Iterate through the write barrier patch table created in the .clrwb section + // (see write barrier asm code) + WriteBarrierDescriptor * pDesc = &g_rgWriteBarrierDescriptors; + while (pDesc->m_pFuncStart) + { + // If the write barrier is being currently used (as in copied over to the patchable site) + // then read the patch location from the table and use the offset to patch the target asm code + PBYTE to = FindWBMapping(pDesc->m_pFuncStart); + if(to) + { + GWB_PATCH_OFFSET(g_lowest_address); + GWB_PATCH_OFFSET(g_highest_address); + GWB_PATCH_OFFSET(g_ephemeral_low); + GWB_PATCH_OFFSET(g_ephemeral_high); + GWB_PATCH_OFFSET(g_card_table); + } + + pDesc++; + } + + // We've changed code so we must flush the instruction cache. + BYTE *pbAlteredRange; + DWORD cbAlteredRange; + ComputeWriteBarrierRange(&pbAlteredRange, &cbAlteredRange); + FlushInstructionCache(GetCurrentProcess(), pbAlteredRange, cbAlteredRange); +} + +void StompWriteBarrierResize(bool isRuntimeSuspended, bool bReqUpperBoundsCheck) +{ + // The runtime is not always suspended when this is called (unlike StompWriteBarrierEphemeral) but we have + // no way to update the barrier code atomically on ARM since each 32-bit value we change is loaded over + // two instructions. So we have to suspend the EE (which forces code out of the barrier functions) before + // proceeding. Luckily the case where the runtime is not already suspended is relatively rare (allocation + // of a new large object heap segment). Skip the suspend for the case where we're called during runtime + // startup. + + // suspend/resuming the EE under GC stress will trigger a GC and if we're holding the + // GC lock due to allocating a LOH segment it will cause a deadlock so disable it here. + GCStressPolicy::InhibitHolder iholder; + + bool fSuspended = false; + if (!isRuntimeSuspended) + { + ThreadSuspend::SuspendEE(ThreadSuspend::SUSPEND_OTHER); + fSuspended = true; + } + + UpdateGCWriteBarriers(bReqUpperBoundsCheck); + + if (fSuspended) + ThreadSuspend::RestartEE(FALSE, TRUE); +} + +void StompWriteBarrierEphemeral(bool isRuntimeSuspended) +{ + UNREFERENCED_PARAMETER(isRuntimeSuspended); + _ASSERTE(isRuntimeSuspended); + UpdateGCWriteBarriers(); +} +#endif // CROSSGEN_COMPILE + +#endif // !DACCESS_COMPILE + +#ifndef CROSSGEN_COMPILE +void LazyMachState::unwindLazyState(LazyMachState* baseState, + MachState* unwoundstate, + DWORD threadId, + int funCallDepth, + HostCallPreference hostCallPreference) +{ + T_CONTEXT ctx; + T_KNONVOLATILE_CONTEXT_POINTERS nonVolRegPtrs; + + ctx.Pc = baseState->captureIp; + ctx.Sp = baseState->captureSp; + + ctx.R4 = unwoundstate->captureR4_R11[0] = baseState->captureR4_R11[0]; + ctx.R5 = unwoundstate->captureR4_R11[1] = baseState->captureR4_R11[1]; + ctx.R6 = unwoundstate->captureR4_R11[2] = baseState->captureR4_R11[2]; + ctx.R7 = unwoundstate->captureR4_R11[3] = baseState->captureR4_R11[3]; + ctx.R8 = unwoundstate->captureR4_R11[4] = baseState->captureR4_R11[4]; + ctx.R9 = unwoundstate->captureR4_R11[5] = baseState->captureR4_R11[5]; + ctx.R10 = unwoundstate->captureR4_R11[6] = baseState->captureR4_R11[6]; + ctx.R11 = unwoundstate->captureR4_R11[7] = baseState->captureR4_R11[7]; + +#if !defined(DACCESS_COMPILE) + // For DAC, if we get here, it means that the LazyMachState is uninitialized and we have to unwind it. + // The API we use to unwind in DAC is StackWalk64(), which does not support the context pointers. + // + // Restore the integer registers to KNONVOLATILE_CONTEXT_POINTERS to be used for unwinding. + nonVolRegPtrs.R4 = &unwoundstate->captureR4_R11[0]; + nonVolRegPtrs.R5 = &unwoundstate->captureR4_R11[1]; + nonVolRegPtrs.R6 = &unwoundstate->captureR4_R11[2]; + nonVolRegPtrs.R7 = &unwoundstate->captureR4_R11[3]; + nonVolRegPtrs.R8 = &unwoundstate->captureR4_R11[4]; + nonVolRegPtrs.R9 = &unwoundstate->captureR4_R11[5]; + nonVolRegPtrs.R10 = &unwoundstate->captureR4_R11[6]; + nonVolRegPtrs.R11 = &unwoundstate->captureR4_R11[7]; +#endif // DACCESS_COMPILE + + LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK LazyMachState::unwindLazyState(ip:%p,sp:%p)\n", baseState->captureIp, baseState->captureSp)); + + PCODE pvControlPc; + + do + { +#ifndef FEATURE_PAL + pvControlPc = Thread::VirtualUnwindCallFrame(&ctx, &nonVolRegPtrs); +#else // !FEATURE_PAL +#ifdef DACCESS_COMPILE + HRESULT hr = DacVirtualUnwind(threadId, &ctx, &nonVolRegPtrs); + if (FAILED(hr)) + { + DacError(hr); + } +#else // DACCESS_COMPILE + BOOL success = PAL_VirtualUnwind(&ctx, &nonVolRegPtrs); + if (!success) + { + _ASSERTE(!"unwindLazyState: Unwinding failed"); + EEPOLICY_HANDLE_FATAL_ERROR(COR_E_EXECUTIONENGINE); + } +#endif // DACCESS_COMPILE + pvControlPc = GetIP(&ctx); +#endif // !FEATURE_PAL + if (funCallDepth > 0) + { + --funCallDepth; + if (funCallDepth == 0) + break; + } + else + { + // Determine whether given IP resides in JITted code. (It returns nonzero in that case.) + // Use it now to see if we've unwound to managed code yet. + BOOL fFailedReaderLock = FALSE; + BOOL fIsManagedCode = ExecutionManager::IsManagedCode(pvControlPc, hostCallPreference, &fFailedReaderLock); + if (fFailedReaderLock) + { + // We don't know if we would have been able to find a JIT + // manager, because we couldn't enter the reader lock without + // yielding (and our caller doesn't want us to yield). So abort + // now. + + // Invalidate the lazyState we're returning, so the caller knows + // we aborted before we could fully unwind + unwoundstate->_isValid = false; + return; + } + + if (fIsManagedCode) + break; + } + } + while(TRUE); + + // + // Update unwoundState so that HelperMethodFrameRestoreState knows which + // registers have been potentially modified. + // + + unwoundstate->_pc = ctx.Pc; + unwoundstate->_sp = ctx.Sp; + +#ifdef DACCESS_COMPILE + // For DAC builds, we update the registers directly since we dont have context pointers + unwoundstate->captureR4_R11[0] = ctx.R4; + unwoundstate->captureR4_R11[1] = ctx.R5; + unwoundstate->captureR4_R11[2] = ctx.R6; + unwoundstate->captureR4_R11[3] = ctx.R7; + unwoundstate->captureR4_R11[4] = ctx.R8; + unwoundstate->captureR4_R11[5] = ctx.R9; + unwoundstate->captureR4_R11[6] = ctx.R10; + unwoundstate->captureR4_R11[7] = ctx.R11; +#else // !DACCESS_COMPILE + // For non-DAC builds, update the register state from context pointers + unwoundstate->_R4_R11[0] = (PDWORD)nonVolRegPtrs.R4; + unwoundstate->_R4_R11[1] = (PDWORD)nonVolRegPtrs.R5; + unwoundstate->_R4_R11[2] = (PDWORD)nonVolRegPtrs.R6; + unwoundstate->_R4_R11[3] = (PDWORD)nonVolRegPtrs.R7; + unwoundstate->_R4_R11[4] = (PDWORD)nonVolRegPtrs.R8; + unwoundstate->_R4_R11[5] = (PDWORD)nonVolRegPtrs.R9; + unwoundstate->_R4_R11[6] = (PDWORD)nonVolRegPtrs.R10; + unwoundstate->_R4_R11[7] = (PDWORD)nonVolRegPtrs.R11; +#endif // DACCESS_COMPILE + + unwoundstate->_isValid = true; +} + +void HelperMethodFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + SUPPORTS_DAC; + } + CONTRACTL_END; + + pRD->IsCallerContextValid = FALSE; + pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. + + // + // Copy the saved state from the frame to the current context. + // + + LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK HelperMethodFrame::UpdateRegDisplay cached ip:%p, sp:%p\n", m_MachState._pc, m_MachState._sp)); + + #if defined(DACCESS_COMPILE) + // For DAC, we may get here when the HMF is still uninitialized. + // So we may need to unwind here. + if (!m_MachState.isValid()) + { + // This allocation throws on OOM. + MachState* pUnwoundState = (MachState*)DacAllocHostOnlyInstance(sizeof(*pUnwoundState), true); + + InsureInit(false, pUnwoundState); + + pRD->pCurrentContext->Pc = pRD->ControlPC = pUnwoundState->_pc; + pRD->pCurrentContext->Sp = pRD->SP = pUnwoundState->_sp; + + pRD->pCurrentContext->R4 = (DWORD)(pUnwoundState->captureR4_R11[0]); + pRD->pCurrentContext->R5 = (DWORD)(pUnwoundState->captureR4_R11[1]); + pRD->pCurrentContext->R6 = (DWORD)(pUnwoundState->captureR4_R11[2]); + pRD->pCurrentContext->R7 = (DWORD)(pUnwoundState->captureR4_R11[3]); + pRD->pCurrentContext->R8 = (DWORD)(pUnwoundState->captureR4_R11[4]); + pRD->pCurrentContext->R9 = (DWORD)(pUnwoundState->captureR4_R11[5]); + pRD->pCurrentContext->R10 = (DWORD)(pUnwoundState->captureR4_R11[6]); + pRD->pCurrentContext->R11 = (DWORD)(pUnwoundState->captureR4_R11[7]); + + return; + } +#endif // DACCESS_COMPILE + + // reset pContext; it's only valid for active (top-most) frame + pRD->pContext = NULL; + pRD->ControlPC = GetReturnAddress(); + pRD->SP = (DWORD)(size_t)m_MachState._sp; + + pRD->pCurrentContext->Pc = pRD->ControlPC; + pRD->pCurrentContext->Sp = pRD->SP; + + pRD->pCurrentContext->R4 = *m_MachState._R4_R11[0]; + pRD->pCurrentContext->R5 = *m_MachState._R4_R11[1]; + pRD->pCurrentContext->R6 = *m_MachState._R4_R11[2]; + pRD->pCurrentContext->R7 = *m_MachState._R4_R11[3]; + pRD->pCurrentContext->R8 = *m_MachState._R4_R11[4]; + pRD->pCurrentContext->R9 = *m_MachState._R4_R11[5]; + pRD->pCurrentContext->R10 = *m_MachState._R4_R11[6]; + pRD->pCurrentContext->R11 = *m_MachState._R4_R11[7]; + + pRD->pCurrentContextPointers->R4 = m_MachState._R4_R11[0]; + pRD->pCurrentContextPointers->R5 = m_MachState._R4_R11[1]; + pRD->pCurrentContextPointers->R6 = m_MachState._R4_R11[2]; + pRD->pCurrentContextPointers->R7 = m_MachState._R4_R11[3]; + pRD->pCurrentContextPointers->R8 = m_MachState._R4_R11[4]; + pRD->pCurrentContextPointers->R9 = m_MachState._R4_R11[5]; + pRD->pCurrentContextPointers->R10 = m_MachState._R4_R11[6]; + pRD->pCurrentContextPointers->R11 = m_MachState._R4_R11[7]; + pRD->pCurrentContextPointers->Lr = NULL; +} +#endif // !CROSSGEN_COMPILE + +TADDR FixupPrecode::GetMethodDesc() +{ + LIMITED_METHOD_DAC_CONTRACT; + + // This lookup is also manually inlined in PrecodeFixupThunk assembly code + TADDR base = *PTR_TADDR(GetBase()); + if (base == NULL) + return NULL; + return base + (m_MethodDescChunkIndex * MethodDesc::ALIGNMENT); +} + +#ifdef DACCESS_COMPILE +void FixupPrecode::EnumMemoryRegions(CLRDataEnumMemoryFlags flags) +{ + SUPPORTS_DAC; + DacEnumMemoryRegion(dac_cast<TADDR>(this), sizeof(FixupPrecode)); + + DacEnumMemoryRegion(GetBase(), sizeof(TADDR)); +} +#endif // DACCESS_COMPILE + +#ifndef DACCESS_COMPILE + +void StubPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) +{ + WRAPPER_NO_CONTRACT; + + int n = 0; + + m_rgCode[n++] = 0xf8df; // ldr r12, [pc, #8] + m_rgCode[n++] = 0xc008; + m_rgCode[n++] = 0xf8df; // ldr pc, [pc, #0] + m_rgCode[n++] = 0xf000; + + _ASSERTE(n == _countof(m_rgCode)); + + m_pTarget = GetPreStubEntryPoint(); + m_pMethodDesc = (TADDR)pMD; +} + +#ifdef FEATURE_NATIVE_IMAGE_GENERATION +void StubPrecode::Fixup(DataImage *image) +{ + WRAPPER_NO_CONTRACT; + + image->FixupFieldToNode(this, offsetof(StubPrecode, m_pTarget), + image->GetHelperThunk(CORINFO_HELP_EE_PRESTUB), + 0, + IMAGE_REL_BASED_PTR); + + image->FixupField(this, offsetof(StubPrecode, m_pMethodDesc), + (void*)GetMethodDesc(), + 0, + IMAGE_REL_BASED_PTR); +} +#endif // FEATURE_NATIVE_IMAGE_GENERATION + +void NDirectImportPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) +{ + WRAPPER_NO_CONTRACT; + + int n = 0; + + m_rgCode[n++] = 0xf8df; // ldr r12, [pc, #4] + m_rgCode[n++] = 0xc004; + m_rgCode[n++] = 0xf8df; // ldr pc, [pc, #4] + m_rgCode[n++] = 0xf004; + + _ASSERTE(n == _countof(m_rgCode)); + + m_pMethodDesc = (TADDR)pMD; + m_pTarget = GetEEFuncEntryPoint(NDirectImportThunk); +} + +#ifdef FEATURE_NATIVE_IMAGE_GENERATION +void NDirectImportPrecode::Fixup(DataImage *image) +{ + WRAPPER_NO_CONTRACT; + + image->FixupField(this, offsetof(NDirectImportPrecode, m_pMethodDesc), + (void*)GetMethodDesc(), + 0, + IMAGE_REL_BASED_PTR); + + image->FixupFieldToNode(this, offsetof(NDirectImportPrecode, m_pTarget), + image->GetHelperThunk(CORINFO_HELP_EE_PINVOKE_FIXUP), + 0, + IMAGE_REL_BASED_PTR); +} +#endif + +void FixupPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex /*=0*/, int iPrecodeChunkIndex /*=0*/) +{ + WRAPPER_NO_CONTRACT; + + m_rgCode[0] = 0x46fc; // mov r12, pc + m_rgCode[1] = 0xf8df; // ldr pc, [pc, #4] + m_rgCode[2] = 0xf004; + + // Initialize chunk indices only if they are not initialized yet. This is necessary to make MethodDesc::Reset work. + if (m_PrecodeChunkIndex == 0) + { + _ASSERTE(FitsInU1(iPrecodeChunkIndex)); + m_PrecodeChunkIndex = static_cast<BYTE>(iPrecodeChunkIndex); + } + + if (iMethodDescChunkIndex != -1) + { + if (m_MethodDescChunkIndex == 0) + { + _ASSERTE(FitsInU1(iMethodDescChunkIndex)); + m_MethodDescChunkIndex = static_cast<BYTE>(iMethodDescChunkIndex); + } + + if (*(void**)GetBase() == NULL) + *(void**)GetBase() = (BYTE*)pMD - (iMethodDescChunkIndex * MethodDesc::ALIGNMENT); + } + + _ASSERTE(GetMethodDesc() == (TADDR)pMD); + + if (pLoaderAllocator != NULL) + { + m_pTarget = GetEEFuncEntryPoint(PrecodeFixupThunk); + } +} + +#ifdef FEATURE_NATIVE_IMAGE_GENERATION +// Partial initialization. Used to save regrouped chunks. +void FixupPrecode::InitForSave(int iPrecodeChunkIndex) +{ + STANDARD_VM_CONTRACT; + + m_rgCode[0] = 0x46fc; // mov r12, pc + m_rgCode[1] = 0xf8df; // ldr pc, [pc, #4] + m_rgCode[2] = 0xf004; + + _ASSERTE(FitsInU1(iPrecodeChunkIndex)); + m_PrecodeChunkIndex = static_cast<BYTE>(iPrecodeChunkIndex); + + // The rest is initialized in code:FixupPrecode::Fixup +} + +void FixupPrecode::Fixup(DataImage *image, MethodDesc * pMD) +{ + STANDARD_VM_CONTRACT; + + // Note that GetMethodDesc() does not return the correct value because of + // regrouping of MethodDescs into hot and cold blocks. That's why the caller + // has to supply the actual MethodDesc + + SSIZE_T mdChunkOffset; + ZapNode * pMDChunkNode = image->GetNodeForStructure(pMD, &mdChunkOffset); + ZapNode * pHelperThunk = image->GetHelperThunk(CORINFO_HELP_EE_PRECODE_FIXUP); + + image->FixupFieldToNode(this, offsetof(FixupPrecode, m_pTarget), pHelperThunk); + + // Set the actual chunk index + FixupPrecode * pNewPrecode = (FixupPrecode *)image->GetImagePointer(this); + + size_t mdOffset = mdChunkOffset - sizeof(MethodDescChunk); + size_t chunkIndex = mdOffset / MethodDesc::ALIGNMENT; + _ASSERTE(FitsInU1(chunkIndex)); + pNewPrecode->m_MethodDescChunkIndex = (BYTE) chunkIndex; + + // Fixup the base of MethodDescChunk + if (m_PrecodeChunkIndex == 0) + { + image->FixupFieldToNode(this, (BYTE *)GetBase() - (BYTE *)this, + pMDChunkNode, sizeof(MethodDescChunk)); + } +} +#endif // FEATURE_NATIVE_IMAGE_GENERATION + +void ThisPtrRetBufPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) +{ + WRAPPER_NO_CONTRACT; + + int n = 0; + + m_rgCode[n++] = 0x4684; // mov r12, r0 + m_rgCode[n++] = 0x4608; // mov r0, r1 + m_rgCode[n++] = 0xea4f; // mov r1, r12 + m_rgCode[n++] = 0x010c; + m_rgCode[n++] = 0xf8df; // ldr pc, [pc, #0] + m_rgCode[n++] = 0xf000; + + _ASSERTE(n == _countof(m_rgCode)); + + m_pTarget = GetPreStubEntryPoint(); + m_pMethodDesc = (TADDR)pMD; +} + + +#ifdef HAS_REMOTING_PRECODE + +void RemotingPrecode::Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator) +{ + WRAPPER_NO_CONTRACT; + + int n = 0; + + m_rgCode[n++] = 0xb502; // push {r1,lr} + m_rgCode[n++] = 0x4904; // ldr r1, [pc, #16] ; =m_pPrecodeRemotingThunk + m_rgCode[n++] = 0x4788; // blx r1 + m_rgCode[n++] = 0xe8bd; // pop {r1,lr} + m_rgCode[n++] = 0x4002; + m_rgCode[n++] = 0xf8df; // ldr pc, [pc, #12] ; =m_pLocalTarget + m_rgCode[n++] = 0xf00c; + m_rgCode[n++] = 0xbf00; // nop ; padding for alignment + + _ASSERTE(n == _countof(m_rgCode)); + + m_pMethodDesc = (TADDR)pMD; + m_pPrecodeRemotingThunk = GetEEFuncEntryPoint(PrecodeRemotingThunk); + m_pLocalTarget = GetPreStubEntryPoint(); +} + +#ifdef FEATURE_NATIVE_IMAGE_GENERATION +void RemotingPrecode::Fixup(DataImage *image, ZapNode *pCodeNode) +{ + WRAPPER_NO_CONTRACT; + + if (pCodeNode) + image->FixupFieldToNode(this, offsetof(RemotingPrecode, m_pLocalTarget), + pCodeNode, + THUMB_CODE, + IMAGE_REL_BASED_PTR); + else + image->FixupFieldToNode(this, offsetof(RemotingPrecode, m_pLocalTarget), + image->GetHelperThunk(CORINFO_HELP_EE_PRESTUB), + 0, + IMAGE_REL_BASED_PTR); + + image->FixupFieldToNode(this, offsetof(RemotingPrecode, m_pPrecodeRemotingThunk), + image->GetHelperThunk(CORINFO_HELP_EE_REMOTING_THUNK), + 0, + IMAGE_REL_BASED_PTR); + + image->FixupField(this, offsetof(RemotingPrecode, m_pMethodDesc), + (void*)GetMethodDesc(), + 0, + IMAGE_REL_BASED_PTR); +} +#endif // FEATURE_NATIVE_IMAGE_GENERATION + +void CTPMethodTable::ActivatePrecodeRemotingThunk() +{ + // Nothing to do for ARM version of remoting precode (we don't burn the TP MethodTable pointer into + // PrecodeRemotingThunk directly). +} + +#endif // HAS_REMOTING_PRECODE + + +#ifndef CROSSGEN_COMPILE +/* +Rough pseudo-code of interface dispatching: + + // jitted code sets r0, r4: + r0 = object; + r4 = indirectionCell; + // jitted code calls *indirectionCell + switch (*indirectionCell) + { + case LookupHolder._stub: + // ResolveWorkerAsmStub: + *indirectionCell = DispatchHolder._stub; + call ResolveWorkerStatic, jump to target method; + case DispatchHolder._stub: + if (r0.methodTable == expectedMethodTable) jump to target method; + // ResolveHolder._stub._failEntryPoint: + jump to case ResolveHolder._stub._resolveEntryPoint; + case ResolveHolder._stub._resolveEntryPoint: + if (r0.methodTable in hashTable) jump to target method; + // ResolveHolder._stub._slowEntryPoint: + // ResolveWorkerChainLookupAsmStub: + // ResolveWorkerAsmStub: + if (_failEntryPoint called too many times) *indirectionCell = ResolveHolder._stub._resolveEntryPoint; + call ResolveWorkerStatic, jump to target method; + } + +Note that ResolveWorkerChainLookupAsmStub currently points directly +to ResolveWorkerAsmStub; in the future, this could be separate. +*/ + +void LookupHolder::InitializeStatic() +{ + // Nothing to initialize +} + +void LookupHolder::Initialize(PCODE resolveWorkerTarget, size_t dispatchToken) +{ + // Called directly by JITTED code + // See ResolveWorkerAsmStub + + // ldr r12, [pc + 8] ; #_token + _stub._entryPoint[0] = 0xf8df; + _stub._entryPoint[1] = 0xc008; + // ldr pc, [pc] ; #_resolveWorkerTarget + _stub._entryPoint[2] = 0xf8df; + _stub._entryPoint[3] = 0xf000; + + _stub._resolveWorkerTarget = resolveWorkerTarget; + _stub._token = dispatchToken; + _ASSERTE(4 == LookupStub::entryPointLen); +} + +void DispatchHolder::InitializeStatic() +{ + // Nothing to initialize +}; + +void DispatchHolder::Initialize(PCODE implTarget, PCODE failTarget, size_t expectedMT) +{ + // Called directly by JITTED code + // DispatchHolder._stub._entryPoint(r0:object, r1, r2, r3, r4:IndirectionCell) + // { + // if (r0.methodTable == this._expectedMT) (this._implTarget)(r0, r1, r2, r3); + // else (this._failTarget)(r0, r1, r2, r3, r4); + // } + + int n = 0; + WORD offset; + + // We rely on the stub entry-point being DWORD aligned (so we can tell whether any subsequent WORD is + // DWORD-aligned or not, which matters in the calculation of PC-relative offsets). + _ASSERTE(((UINT_PTR)_stub._entryPoint & 0x3) == 0); + +// Compute a PC-relative offset for use in an instruction encoding. Must call this prior to emitting the +// instruction halfword to which it applies. For thumb-2 encodings the offset must be computed before emitting +// the first of the halfwords. +#undef PC_REL_OFFSET +#define PC_REL_OFFSET(_field) (WORD)(offsetof(DispatchStub, _field) - (offsetof(DispatchStub, _entryPoint[n + 2]) & 0xfffffffc)) + + // r0 : object. It can be null as well. + // when it is null the code causes an AV. This AV is seen by the VM's personality routine + // and it converts it into nullRef. We want the AV to happen before modifying the stack so that we can get the + // call stack in windbg at the point of AV. So therefore "ldr r12, [r0]" should be the first instruction. + + // ldr r12, [r0 + #Object.m_pMethTab] + _stub._entryPoint[n++] = DISPATCH_STUB_FIRST_WORD; + _stub._entryPoint[n++] = 0xc000; + + // push {r5} + _stub._entryPoint[n++] = 0xb420; + + // ldr r5, [pc + #_expectedMT] + offset = PC_REL_OFFSET(_expectedMT); + _ASSERTE((offset & 0x3) == 0); + _stub._entryPoint[n++] = 0x4d00 | (offset >> 2); + + // cmp r5, r12 + _stub._entryPoint[n++] = 0x4565; + + // pop {r5} + _stub._entryPoint[n++] = 0xbc20; + + // bne failTarget + _stub._entryPoint[n++] = 0xd101; + + // ldr pc, [pc + #_implTarget] + offset = PC_REL_OFFSET(_implTarget); + _stub._entryPoint[n++] = 0xf8df; + _stub._entryPoint[n++] = 0xf000 | offset; + + // failTarget: + // ldr pc, [pc + #_failTarget] + offset = PC_REL_OFFSET(_failTarget); + _stub._entryPoint[n++] = 0xf8df; + _stub._entryPoint[n++] = 0xf000 | offset; + + // nop - insert padding + _stub._entryPoint[n++] = 0xbf00; + + _ASSERTE(n == DispatchStub::entryPointLen); + + // Make sure that the data members below are aligned + _ASSERTE((n & 1) == 0); + + _stub._expectedMT = DWORD(expectedMT); + _stub._failTarget = failTarget; + _stub._implTarget = implTarget; +} + +void ResolveHolder::InitializeStatic() +{ +} + +void ResolveHolder::Initialize(PCODE resolveWorkerTarget, PCODE patcherTarget, + size_t dispatchToken, UINT32 hashedToken, + void * cacheAddr, INT32 * counterAddr) +{ + // Called directly by JITTED code + // ResolveStub._resolveEntryPoint(r0:Object*, r1, r2, r3, r4:IndirectionCellAndFlags) + // { + // MethodTable mt = r0.m_pMethTab; + // int i = ((mt + mt >> 12) ^ this._hashedToken) & this._cacheMask + // ResolveCacheElem e = this._cacheAddress + i + // do + // { + // if (mt == e.pMT && this._token == e.token) (e.target)(r0, r1, r2, r3); + // e = e.pNext; + // } while (e != null) + // (this._slowEntryPoint)(r0, r1, r2, r3, r4); + // } + // + + int n = 0; + WORD offset; + + // We rely on the stub entry-point being DWORD aligned (so we can tell whether any subsequent WORD is + // DWORD-aligned or not, which matters in the calculation of PC-relative offsets). + _ASSERTE(((UINT_PTR)_stub._resolveEntryPoint & 0x3) == 0); + +// Compute a PC-relative offset for use in an instruction encoding. Must call this prior to emitting the +// instruction halfword to which it applies. For thumb-2 encodings the offset must be computed before emitting +// the first of the halfwords. +#undef PC_REL_OFFSET +#define PC_REL_OFFSET(_field) (WORD)(offsetof(ResolveStub, _field) - (offsetof(ResolveStub, _resolveEntryPoint[n + 2]) & 0xfffffffc)) + + // ldr r12, [r0 + #Object.m_pMethTab] + _stub._resolveEntryPoint[n++] = RESOLVE_STUB_FIRST_WORD; + _stub._resolveEntryPoint[n++] = 0xc000; + + // ;; We need two scratch registers, r5 and r6 + // push {r5,r6} + _stub._resolveEntryPoint[n++] = 0xb460; + + // ;; Compute i = ((mt + mt >> 12) ^ this._hashedToken) & this._cacheMask + + // add r6, r12, r12 lsr #12 + _stub._resolveEntryPoint[n++] = 0xeb0c; + _stub._resolveEntryPoint[n++] = 0x361c; + + // ldr r5, [pc + #_hashedToken] + offset = PC_REL_OFFSET(_hashedToken); + _ASSERTE((offset & 0x3) == 0); + _stub._resolveEntryPoint[n++] = 0x4d00 | (offset >> 2); + + // eor r6, r6, r5 + _stub._resolveEntryPoint[n++] = 0xea86; + _stub._resolveEntryPoint[n++] = 0x0605; + + // ldr r5, [pc + #_cacheMask] + offset = PC_REL_OFFSET(_cacheMask); + _ASSERTE((offset & 0x3) == 0); + _stub._resolveEntryPoint[n++] = 0x4d00 | (offset >> 2); + + // and r6, r6, r5 + _stub._resolveEntryPoint[n++] = 0xea06; + _stub._resolveEntryPoint[n++] = 0x0605; + + // ;; ResolveCacheElem e = this._cacheAddress + i + // ldr r5, [pc + #_cacheAddress] + offset = PC_REL_OFFSET(_cacheAddress); + _ASSERTE((offset & 0x3) == 0); + _stub._resolveEntryPoint[n++] = 0x4d00 | (offset >> 2); + + // ldr r6, [r5 + r6] ;; r6 = e = this._cacheAddress + i + _stub._resolveEntryPoint[n++] = 0x59ae; + + // ;; do { + int loop = n; + + // ;; Check mt == e.pMT + // ldr r5, [r6 + #ResolveCacheElem.pMT] + offset = offsetof(ResolveCacheElem, pMT); + _ASSERTE(offset <= 124 && (offset & 0x3) == 0); + _stub._resolveEntryPoint[n++] = 0x6835 | (offset<< 4); + + // cmp r12, r5 + _stub._resolveEntryPoint[n++] = 0x45ac; + + // bne nextEntry + _stub._resolveEntryPoint[n++] = 0xd108; + + // ;; Check this._token == e.token + // ldr r5, [pc + #_token] + offset = PC_REL_OFFSET(_token); + _ASSERTE((offset & 0x3) == 0); + _stub._resolveEntryPoint[n++] = 0x4d00 | (offset>>2); + + // ldr r12, [r6 + #ResolveCacheElem.token] + offset = offsetof(ResolveCacheElem, token); + _stub._resolveEntryPoint[n++] = 0xf8d6; + _stub._resolveEntryPoint[n++] = 0xc000 | offset; + + // cmp r12, r5 + _stub._resolveEntryPoint[n++] = 0x45ac; + + // bne nextEntry + _stub._resolveEntryPoint[n++] = 0xd103; + + // ldr r12, [r6 + #ResolveCacheElem.target] ;; r12 : e.target + offset = offsetof(ResolveCacheElem, target); + _stub._resolveEntryPoint[n++] = 0xf8d6; + _stub._resolveEntryPoint[n++] = 0xc000 | offset; + + // ;; Restore r5 and r6 + // pop {r5,r6} + _stub._resolveEntryPoint[n++] = 0xbc60; + + // ;; Branch to e.target + // bx r12 ;; (e.target)(r0,r1,r2,r3) + _stub._resolveEntryPoint[n++] = 0x4760; + + // nextEntry: + // ;; e = e.pNext; + // ldr r6, [r6 + #ResolveCacheElem.pNext] + offset = offsetof(ResolveCacheElem, pNext); + _ASSERTE(offset <=124 && (offset & 0x3) == 0); + _stub._resolveEntryPoint[n++] = 0x6836 | (offset << 4); + + // ;; } while(e != null); + // cbz r6, slowEntryPoint + _stub._resolveEntryPoint[n++] = 0xb116; + + // ldr r12, [r0 + #Object.m_pMethTab] + _stub._resolveEntryPoint[n++] = 0xf8d0; + _stub._resolveEntryPoint[n++] = 0xc000; + + // b loop + offset = (WORD)((loop - (n + 2)) * sizeof(WORD)); + offset = (offset >> 1) & 0x07ff; + _stub._resolveEntryPoint[n++] = 0xe000 | offset; + + // slowEntryPoint: + // pop {r5,r6} + _stub._resolveEntryPoint[n++] = 0xbc60; + + // nop for alignment + _stub._resolveEntryPoint[n++] = 0xbf00; + + // the slow entry point be DWORD-aligned (see _ASSERTE below) insert nops if necessary . + + // ARMSTUB TODO: promotion + + // fall through to slow case + _ASSERTE(_stub._resolveEntryPoint + n == _stub._slowEntryPoint); + _ASSERTE(n == ResolveStub::resolveEntryPointLen); + + // ResolveStub._slowEntryPoint(r0:MethodToken, r1, r2, r3, r4:IndirectionCellAndFlags) + // { + // r12 = this._tokenSlow; + // this._resolveWorkerTarget(r0, r1, r2, r3, r4, r12); + // } + + // The following macro relies on this entry point being DWORD-aligned. We've already asserted that the + // overall stub is aligned above, just need to check that the preceding stubs occupy an even number of + // WORD slots. + _ASSERTE((n & 1) == 0); + +#undef PC_REL_OFFSET +#define PC_REL_OFFSET(_field) (WORD)(offsetof(ResolveStub, _field) - (offsetof(ResolveStub, _slowEntryPoint[n + 2]) & 0xfffffffc)) + + n = 0; + + // ldr r12, [pc + #_tokenSlow] + offset = PC_REL_OFFSET(_tokenSlow); + _stub._slowEntryPoint[n++] = 0xf8df; + _stub._slowEntryPoint[n++] = 0xc000 | offset; + + // ldr pc, [pc + #_resolveWorkerTarget] + offset = PC_REL_OFFSET(_resolveWorkerTarget); + _stub._slowEntryPoint[n++] = 0xf8df; + _stub._slowEntryPoint[n++] = 0xf000 | offset; + + _ASSERTE(n == ResolveStub::slowEntryPointLen); + + // ResolveStub._failEntryPoint(r0:MethodToken, r1, r2, r3, r4:IndirectionCellAndFlags) + // { + // if(--*(this._pCounter) < 0) r4 = r4 | SDF_ResolveBackPatch; + // this._resolveEntryPoint(r0, r1, r2, r3, r4); + // } + + // The following macro relies on this entry point being DWORD-aligned. We've already asserted that the + // overall stub is aligned above, just need to check that the preceding stubs occupy an even number of + // WORD slots. + _ASSERTE((n & 1) == 0); + +#undef PC_REL_OFFSET +#define PC_REL_OFFSET(_field) (WORD)(offsetof(ResolveStub, _field) - (offsetof(ResolveStub, _failEntryPoint[n + 2]) & 0xfffffffc)) + + n = 0; + + // push {r5} + _stub._failEntryPoint[n++] = 0xb420; + + // ldr r5, [pc + #_pCounter] + offset = PC_REL_OFFSET(_pCounter); + _ASSERTE((offset & 0x3) == 0); + _stub._failEntryPoint[n++] = 0x4d00 | (offset >>2); + + // ldr r12, [r5] + _stub._failEntryPoint[n++] = 0xf8d5; + _stub._failEntryPoint[n++] = 0xc000; + + // subs r12, r12, #1 + _stub._failEntryPoint[n++] = 0xf1bc; + _stub._failEntryPoint[n++] = 0x0c01; + + // str r12, [r5] + _stub._failEntryPoint[n++] = 0xf8c5; + _stub._failEntryPoint[n++] = 0xc000; + + // pop {r5} + _stub._failEntryPoint[n++] = 0xbc20; + + // bge resolveEntryPoint + _stub._failEntryPoint[n++] = 0xda01; + + // or r4, r4, SDF_ResolveBackPatch + _ASSERTE(SDF_ResolveBackPatch < 256); + _stub._failEntryPoint[n++] = 0xf044; + _stub._failEntryPoint[n++] = 0x0400 | SDF_ResolveBackPatch; + + // resolveEntryPoint: + // b _resolveEntryPoint + offset = (WORD)(offsetof(ResolveStub, _resolveEntryPoint) - offsetof(ResolveStub, _failEntryPoint[n + 2])); + _ASSERTE((offset & 1) == 0); + offset = (offset >> 1) & 0x07ff; + _stub._failEntryPoint[n++] = 0xe000 | offset; + + // nop for alignment + _stub._failEntryPoint[n++] = 0xbf00; + + _ASSERTE(n == ResolveStub::failEntryPointLen); + + _stub._pCounter = counterAddr; + _stub._hashedToken = hashedToken << LOG2_PTRSIZE; + _stub._cacheAddress = (size_t) cacheAddr; + _stub._token = dispatchToken; + _stub._tokenSlow = dispatchToken; + _stub._resolveWorkerTarget = resolveWorkerTarget; + _stub._cacheMask = CALL_STUB_CACHE_MASK * sizeof(void*); + + _ASSERTE(resolveWorkerTarget == (PCODE)ResolveWorkerChainLookupAsmStub); + _ASSERTE(patcherTarget == NULL); +} + +BOOL DoesSlotCallPrestub(PCODE pCode) +{ + PTR_WORD pInstr = dac_cast<PTR_WORD>(PCODEToPINSTR(pCode)); + + // FixupPrecode + if (pInstr[0] == 0x46fc && // // mov r12, pc + pInstr[1] == 0xf8df && + pInstr[2] == 0xf004) + { + PCODE pTarget = dac_cast<PTR_FixupPrecode>(pInstr)->m_pTarget; + + // Check for jump stub (NGen case) + if (isJump(pTarget)) + { + pTarget = decodeJump(pTarget); + } + + return pTarget == (TADDR)PrecodeFixupThunk; + } + + // StubPrecode + if (pInstr[0] == 0xf8df && // ldr r12, [pc + 8] + pInstr[1] == 0xc008 && + pInstr[2] == 0xf8df && // ldr pc, [pc] + pInstr[3] == 0xf000) + { + PCODE pTarget = dac_cast<PTR_StubPrecode>(pInstr)->m_pTarget; + + // Check for jump stub (NGen case) + if (isJump(pTarget)) + { + pTarget = decodeJump(pTarget); + } + + return pTarget == GetPreStubEntryPoint(); + } + + return FALSE; +} + +Stub *GenerateInitPInvokeFrameHelper() +{ + CONTRACT(Stub*) + { + THROWS; + GC_NOTRIGGER; + MODE_ANY; + + POSTCONDITION(CheckPointer(RETVAL)); + } + CONTRACT_END; + + CPUSTUBLINKER sl; + CPUSTUBLINKER *psl = &sl; + + CORINFO_EE_INFO::InlinedCallFrameInfo FrameInfo; + InlinedCallFrame::GetEEInfo(&FrameInfo); + + // R4 contains address of the frame on stack (the frame ptr, not its neg space) + unsigned negSpace = FrameInfo.offsetOfFrameVptr; + + ThumbReg regFrame = ThumbReg(4); + ThumbReg regThread = ThumbReg(5); + ThumbReg regScratch = ThumbReg(6); + +#ifdef FEATURE_IMPLICIT_TLS + TLSACCESSMODE mode = TLSACCESS_GENERIC; +#else + TLSACCESSMODE mode = GetTLSAccessMode(GetThreadTLSIndex()); +#endif + + + if (mode == TLSACCESS_GENERIC) + { + // Erect frame to perform call to GetThread + psl->ThumbEmitProlog(1, sizeof(ArgumentRegisters), FALSE); // Save r4 for aligned stack + + // Save argument registers around the GetThread call. Don't bother with using ldm/stm since this inefficient path anyway. + for (int reg = 0; reg < 4; reg++) + psl->ThumbEmitStoreRegIndirect(ThumbReg(reg), thumbRegSp, offsetof(ArgumentRegisters, r[reg])); + } + + psl->ThumbEmitGetThread(mode, regThread); + + if (mode == TLSACCESS_GENERIC) + { + for (int reg = 0; reg < 4; reg++) + psl->ThumbEmitLoadRegIndirect(ThumbReg(reg), thumbRegSp, offsetof(ArgumentRegisters, r[reg])); + } + + // mov [regFrame + FrameInfo.offsetOfGSCookie], GetProcessGSCookie() + psl->ThumbEmitMovConstant(regScratch, GetProcessGSCookie()); + psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfGSCookie - negSpace); + + // mov [regFrame + FrameInfo.offsetOfFrameVptr], InlinedCallFrame::GetMethodFrameVPtr() + psl->ThumbEmitMovConstant(regScratch, InlinedCallFrame::GetMethodFrameVPtr()); + psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfFrameVptr - negSpace); + + // ldr regScratch, [regThread + offsetof(Thread, m_pFrame)] + // str regScratch, [regFrame + FrameInfo.offsetOfFrameLink] + psl->ThumbEmitLoadRegIndirect(regScratch, regThread, offsetof(Thread, m_pFrame)); + psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfFrameLink - negSpace); + + // str FP, [regFrame + FrameInfo.offsetOfCalleeSavedEbp] + psl->ThumbEmitStoreRegIndirect(thumbRegFp, regFrame, FrameInfo.offsetOfCalleeSavedFP - negSpace); + + // mov [regFrame + FrameInfo.offsetOfReturnAddress], 0 + psl->ThumbEmitMovConstant(regScratch, 0); + psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfReturnAddress - negSpace); + + if (mode == TLSACCESS_GENERIC) + { + DWORD cbSavedRegs = sizeof(ArgumentRegisters) + 2 * 4; // r0-r3, r4, lr + psl->ThumbEmitAdd(regScratch, thumbRegSp, cbSavedRegs); + psl->ThumbEmitStoreRegIndirect(regScratch, regFrame, FrameInfo.offsetOfCallSiteSP - negSpace); + } + else + { + // str SP, [regFrame + FrameInfo.offsetOfCallSiteSP] + psl->ThumbEmitStoreRegIndirect(thumbRegSp, regFrame, FrameInfo.offsetOfCallSiteSP - negSpace); + } + + // mov [regThread + offsetof(Thread, m_pFrame)], regFrame + psl->ThumbEmitStoreRegIndirect(regFrame, regThread, offsetof(Thread, m_pFrame)); + + // leave current Thread in R4 + + if (mode == TLSACCESS_GENERIC) + { + psl->ThumbEmitEpilog(); + } + else + { + // Return. The return address has been restored into LR at this point. + // bx lr + psl->ThumbEmitJumpRegister(thumbRegLr); + } + + // A single process-wide stub that will never unload + RETURN psl->Link(SystemDomain::GetGlobalLoaderAllocator()->GetStubHeap()); +} + +void StubLinkerCPU::ThumbEmitGetThread(TLSACCESSMODE mode, ThumbReg dest) +{ +#ifndef FEATURE_IMPLICIT_TLS + DWORD idxThread = GetThreadTLSIndex(); + + if (mode != TLSACCESS_GENERIC) + { + // mrc p15, 0, dest, c13, c0, 2 + Emit16(0xee1d); + Emit16((WORD)(0x0f50 | (dest << 12))); + + if (mode == TLSACCESS_WNT) + { + // ldr dest, [dest, #(WINNT_TLS_OFFSET + (idxThread * sizeof(void*)))] + ThumbEmitLoadRegIndirect(dest, dest, offsetof(TEB, TlsSlots) + (idxThread * sizeof(void*))); + } + else + { + _ASSERTE(mode == TLSACCESS_WNT_HIGH); + + // ldr dest, [dest, #WINNT5_TLSEXPANSIONPTR_OFFSET] + ThumbEmitLoadRegIndirect(dest, dest, offsetof(TEB, TlsExpansionSlots)); + + // ldr dest, [dest + #(idxThread * 4)] + ThumbEmitLoadRegIndirect(dest, dest, (idxThread - TLS_MINIMUM_AVAILABLE) * sizeof(void*)); + } + } + else + { + ThumbEmitMovConstant(ThumbReg(0), idxThread); + +#pragma push_macro("TlsGetValue") +#undef TlsGetValue + ThumbEmitMovConstant(ThumbReg(1), (TADDR)TlsGetValue); +#pragma pop_macro("TlsGetValue") + + ThumbEmitCallRegister(ThumbReg(1)); + + if (dest != ThumbReg(0)) + { + ThumbEmitMovRegReg(dest, ThumbReg(0)); + } + } +#else + ThumbEmitMovConstant(ThumbReg(0), (TADDR)GetThread); + + ThumbEmitCallRegister(ThumbReg(0)); + + if (dest != ThumbReg(0)) + { + ThumbEmitMovRegReg(dest, ThumbReg(0)); + } +#endif +} +#endif // CROSSGEN_COMPILE + + +// Emits code to adjust for a static delegate target. +VOID StubLinkerCPU::EmitShuffleThunk(ShuffleEntry *pShuffleEntryArray) +{ + // Scan the shuffle entries to see if there any stack-to-stack operations. If there aren't we can emit a + // much simpler thunk (simply because we generate code that doesn't require more than one scratch + // register). + bool fSimpleCase = true; + ShuffleEntry *pEntry = pShuffleEntryArray; + while (pEntry->srcofs != ShuffleEntry::SENTINEL) + { + // It's enough to check whether we have a destination stack location (there are no register to stack + // scenarios). + if (!(pEntry->dstofs & ShuffleEntry::REGMASK)) + { + fSimpleCase = false; + break; + } + pEntry++; + } + + if (fSimpleCase) + { + // No real prolog for the simple case, we're a tail call so we shouldn't be on the stack for any walk + // or unwind. + + // On entry r0 holds the delegate instance. Look up the real target address stored in the MethodPtrAux + // field and stash it in r12. + // ldr r12, [r0, #offsetof(DelegateObject, _methodPtrAux)] + ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(0), DelegateObject::GetOffsetOfMethodPtrAux()); + + // Emit the instructions to rewrite the argument registers. Most will be register-to-register (e.g. + // move r1 to r0) but one or two of them might move values from the top of the incoming stack + // arguments into registers r2 and r3. Note that the entries are ordered so that we don't need to + // worry about a move overwriting a register we'll need to use as input for the next move (i.e. we get + // move r1 to r0, move r2 to r1 etc.). + pEntry = pShuffleEntryArray; + while (pEntry->srcofs != ShuffleEntry::SENTINEL) + { + _ASSERTE(pEntry->dstofs & ShuffleEntry::REGMASK); + + if (pEntry->srcofs & ShuffleEntry::REGMASK) + { + // Move from register case. + ThumbEmitMovRegReg(ThumbReg(pEntry->dstofs & ShuffleEntry::OFSMASK), + ThumbReg(pEntry->srcofs & ShuffleEntry::OFSMASK)); + } + else + { + // Move from the stack case. + // ldr <dest>, [sp + #source_offset] + ThumbEmitLoadRegIndirect(ThumbReg(pEntry->dstofs & ShuffleEntry::OFSMASK), + thumbRegSp, + (pEntry->srcofs & ShuffleEntry::OFSMASK) * 4); + } + + pEntry++; + } + + // Tail call to real target. + // bx r12 + ThumbEmitJumpRegister(ThumbReg(12)); + + return; + } + + // In the more complex case we need to re-write at least some of the arguments on the stack as well as + // argument registers. We need some temporary registers to perform stack-to-stack copies and we've + // reserved our one remaining volatile register, r12, to store the eventual target method address. So + // we're going to generate a hybrid-tail call. Using a tail call has the advantage that we don't need to + // erect and link an explicit CLR frame to enable crawling of this thunk. Additionally re-writing the + // stack can be more peformant in some scenarios than copying the stack (in the presence of floating point + // or arguments requieing 64-bit alignment we might not have to move some or even most of the values). + // The hybrid nature is that we'll erect a standard native frame (with a proper prolog and epilog) so we + // can save some non-volatile registers to act as temporaries. Once we've performed the stack re-write + // we'll poke the saved LR value (which will become a PC value on the pop in the epilog) to return to the + // target method instead of us, thus atomically removing our frame from the stack and tail-calling the + // real target. + + // Prolog: + ThumbEmitProlog(3, // Save r4-r6,lr (count doesn't include lr) + 0, // No additional space in the stack frame required + FALSE); // Don't push argument registers + + // On entry r0 holds the delegate instance. Look up the real target address stored in the MethodPtrAux + // field and stash it in r12. + // ldr r12, [r0, #offsetof(DelegateObject, _methodPtrAux)] + ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(0), DelegateObject::GetOffsetOfMethodPtrAux()); + + // As we copy slots from lower in the argument stack to higher we need to keep track of source and + // destination pointers into those arguments (if we just use offsets from SP we get into trouble with + // argument frames larger than 4K). We'll use r4 to track the source (original location of an argument + // from the caller's perspective) and r5 to track the destination (new location of the argument from the + // callee's perspective). Both start at the current value of SP plus the offset created by pushing our + // stack frame in the prolog. + // add r4, sp, #cbSavedRegs + // add r5, sp, #cbSavedRegs + DWORD cbSavedRegs = 4 * 4; // r4, r5, r6, lr + ThumbEmitAdd(ThumbReg(4), thumbRegSp, cbSavedRegs); + ThumbEmitAdd(ThumbReg(5), thumbRegSp, cbSavedRegs); + + // Follow the shuffle array instructions to re-write some subset of r0-r3 and the stacked arguments to + // remove the unwanted delegate instance in r0. Arguments only ever move from higher registers to lower + // registers or higher stack addresses to lower stack addresses and are ordered from lowest register to + // highest stack address. As a result we can do all updates in order and in place and we'll never + // overwrite a register or stack location needed as a source value in a later iteration. + DWORD dwLastSrcIndex = (DWORD)-1; + DWORD dwLastDstIndex = (DWORD)-1; + pEntry = pShuffleEntryArray; + while (pEntry->srcofs != ShuffleEntry::SENTINEL) + { + // If this is a register-to-register move we can do it in one instruction. + if ((pEntry->srcofs & ShuffleEntry::REGMASK) && (pEntry->dstofs & ShuffleEntry::REGMASK)) + { + ThumbEmitMovRegReg(ThumbReg(pEntry->dstofs & ShuffleEntry::OFSMASK), + ThumbReg(pEntry->srcofs & ShuffleEntry::OFSMASK)); + } + else + { + // There is no case where a source argument register is moved into a destination stack slot. + _ASSERTE((pEntry->srcofs & ShuffleEntry::REGMASK) == 0); + + // Source or destination stack offsets might not be contiguous (though they often will be). + // Floating point arguments and 64-bit aligned values can cause discontinuities. While we copy + // values we'll use post increment addressing modes to move both source and destination stack + // pointers forward 4 bytes at a time, the common case. But we'll insert additional add + // instructions for any holes we find (we detect these by remembering the last source and + // destination stack offset we used). + + // Add any additional offset to the source pointer (r4) to account for holes in the copy. + DWORD dwSrcIndex = pEntry->srcofs & ShuffleEntry::OFSMASK; + if (dwSrcIndex != (dwLastSrcIndex + 1)) + { + _ASSERTE(dwSrcIndex > dwLastSrcIndex); + + // add r4, #gap_size + ThumbEmitIncrement(ThumbReg(4), (dwSrcIndex - dwLastSrcIndex - 1) * 4); + } + dwLastSrcIndex = dwSrcIndex; + + // Load the source value from the stack and increment our source pointer (r4) in one instruction. + // If the target is a register we can move the value directly there. Otherwise we move it to the + // r6 temporary register. + if (pEntry->dstofs & ShuffleEntry::REGMASK) + { + // ldr <regnum>, [r4], #4 + ThumbEmitLoadIndirectPostIncrement(ThumbReg(pEntry->dstofs & ShuffleEntry::OFSMASK), ThumbReg(4), 4); + } + else + { + // ldr r6, [r4], #4 + ThumbEmitLoadIndirectPostIncrement(ThumbReg(6), ThumbReg(4), 4); + + // Add any additional offset to the destination pointer (r5) to account for holes in the copy. + DWORD dwDstIndex = pEntry->dstofs & ShuffleEntry::OFSMASK; + if (dwDstIndex != (dwLastDstIndex + 1)) + { + _ASSERTE(dwDstIndex > dwLastDstIndex); + + // add r5, #gap_size + ThumbEmitIncrement(ThumbReg(5), (dwDstIndex - dwLastDstIndex - 1) * 4); + } + dwLastDstIndex = dwDstIndex; + + // Write the value in r6 to it's final home on the stack and increment our destination pointer + // (r5). + // str r6, [r5], #4 + ThumbEmitStoreIndirectPostIncrement(ThumbReg(6), ThumbReg(5), 4); + } + } + + pEntry++; + } + + // Arguments are copied. Now we modify the saved value of LR we created in our prolog (which will be + // popped back off into PC in our epilog) so that it points to the real target address in r12 rather than + // our return address. We haven't modified LR ourselves, so the net result is that executing our epilog + // will pop our frame and tail call to the real method. + // str r12, [sp + #(cbSavedRegs-4)] + ThumbEmitStoreRegIndirect(ThumbReg(12), thumbRegSp, cbSavedRegs - 4); + + // Epilog: + ThumbEmitEpilog(); +} + +void StubLinkerCPU::ThumbEmitCallManagedMethod(MethodDesc *pMD, bool fTailcall) +{ + // Use direct call if possible. + if (pMD->HasStableEntryPoint()) + { + // mov r12, #entry_point + ThumbEmitMovConstant(ThumbReg(12), (TADDR)pMD->GetStableEntryPoint()); + } + else + { + // mov r12, #slotaddress + ThumbEmitMovConstant(ThumbReg(12), (TADDR)pMD->GetAddrOfSlot()); + + // ldr r12, [r12] + ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(12), 0); + } + + if (fTailcall) + { + // bx r12 + ThumbEmitJumpRegister(ThumbReg(12)); + } + else + { + // blx r12 + ThumbEmitCallRegister(ThumbReg(12)); + } +} + +#ifndef CROSSGEN_COMPILE +// Common code used to generate either an instantiating method stub or an unboxing stub (in the case where the +// unboxing stub also needs to provide a generic instantiation parameter). The stub needs to add the +// instantiation parameter provided in pHiddenArg and re-arrange the rest of the incoming arguments as a +// result (since on ARM this hidden parameter is inserted before explicit user arguments we need a type of +// shuffle thunk in the reverse direction of the type used for static delegates). If pHiddenArg == NULL it +// indicates that we're in the unboxing case and should add sizeof(MethodTable*) to the incoming this pointer +// before dispatching to the target. In this case the instantiating parameter is always the non-shared +// MethodTable pointer we can deduce directly from the incoming 'this' reference. +void StubLinkerCPU::ThumbEmitCallWithGenericInstantiationParameter(MethodDesc *pMD, void *pHiddenArg) +{ + // There is a simple case and a complex case. + // 1) In the simple case the addition of the hidden arg doesn't push any user args onto the stack. In + // this case we only have to re-arrange/initialize some argument registers and tail call to the + // target. + // 2) In the complex case we have to modify the stack by pushing some of the register based user + // arguments. We can't tail call in this case because we've altered the size of the stack and our + // caller doesn't expect this and can't compensate. Instead we'll need to create a stack frame + // (including an explicit Frame to make it crawlable to the runtime) and copy the incoming arguments + // over. + // + // First we need to analyze the signature of the target method both with and without the extra + // instantiation argument. We use ArgIterator to determine the difference in location + // (register or stack offset) for each argument between the two cases. This forms a set instructions that + // tell us how to copy incoming arguments into outgoing arguments (and if those instructions don't include + // any writes to stack locations in the outgoing case then we know we can generate a simple thunk). + + SigTypeContext sTypeContext(pMD, TypeHandle()); + + // Incoming, source, method signature. + MetaSig sSrcSig(pMD->GetSignature(), + pMD->GetModule(), + &sTypeContext, + MetaSig::sigMember); + + // Outgoing, destination, method signature. + MetaSig sDstSig(pMD->GetSignature(), + pMD->GetModule(), + &sTypeContext, + MetaSig::sigMember); + + sDstSig.SetHasParamTypeArg(); + + // Wrap calling convention parsers round the source and destination signatures. These will be responsible + // for determining where each argument lives in registers or on the stack. + ArgIterator sSrcArgLocations(&sSrcSig); + ArgIterator sDstArgLocations(&sDstSig); + + // Define an argument descriptor type that describes how a single 4 byte portion of an argument is mapped + // in the source and destination signature. We only have to worry about general registers and stack + // locations here; floating point argument registers are left unmodified by this thunk. + struct ArgDesc + { + int m_idxSrc; // Source register or stack offset + int m_idxDst; // Destination register or stack offset + bool m_fSrcIsReg; // Source index is a register number + bool m_fDstIsReg; // Destination index is a register number + }; + + // The number of argument move descriptors we'll need is a function of the number of 4-byte registers or + // stack slots the arguments occupy. The following calculation will over-estimate in a few side cases, but + // not by much (it assumes all four argument registers are used plus the number of stack slots that + // MetaSig calculates are needed for the rest of the arguments). + DWORD cArgDescriptors = 4 + (sSrcArgLocations.SizeOfArgStack() / 4); + + // Allocate the array of argument descriptors. + CQuickArray<ArgDesc> rgArgDescs; + rgArgDescs.AllocThrows(cArgDescriptors); + + // We only need to map translations for arguments that could come after the instantiation parameter we're + // inserting. On the ARM the only implicit argument that could follow is a vararg signature cookie, but + // it's disallowed in this case. So we simply walk the user arguments. + _ASSERTE(!sSrcSig.IsVarArg()); + + INT srcOffset; + INT dstOffset; + + DWORD idxCurrentDesc = 0; + while ((srcOffset = sSrcArgLocations.GetNextOffset()) != TransitionBlock::InvalidOffset) + { + dstOffset = sDstArgLocations.GetNextOffset(); + + // Get the placement for a single argument in the source and destination signatures (may include + // multiple registers and/or stack locations if the argument is larger than 4 bytes). + ArgLocDesc sSrcArgLoc; + sSrcArgLocations.GetArgLoc(srcOffset, &sSrcArgLoc); + ArgLocDesc sDstArgLoc; + sDstArgLocations.GetArgLoc(dstOffset, &sDstArgLoc); + + // Fill in as many single-slot descriptors as the argument needs. Note that we ignore any floating + // point register cases (m_cFloatReg > 0) since these will never change due to the hidden arg + // insertion. + while (sSrcArgLoc.m_cGenReg || sSrcArgLoc.m_cStack) + { + _ASSERTE(idxCurrentDesc < cArgDescriptors); + + if (sSrcArgLoc.m_cGenReg) + { + sSrcArgLoc.m_cGenReg--; + rgArgDescs[idxCurrentDesc].m_idxSrc = sSrcArgLoc.m_idxGenReg++; + rgArgDescs[idxCurrentDesc].m_fSrcIsReg = true; + } + else + { + _ASSERTE(sSrcArgLoc.m_cStack > 0); + sSrcArgLoc.m_cStack--; + rgArgDescs[idxCurrentDesc].m_idxSrc = sSrcArgLoc.m_idxStack++; + rgArgDescs[idxCurrentDesc].m_fSrcIsReg = false; + } + + if (sDstArgLoc.m_cGenReg) + { + sDstArgLoc.m_cGenReg--; + rgArgDescs[idxCurrentDesc].m_idxDst = sDstArgLoc.m_idxGenReg++; + rgArgDescs[idxCurrentDesc].m_fDstIsReg = true; + } + else + { + _ASSERTE(sDstArgLoc.m_cStack > 0); + sDstArgLoc.m_cStack--; + rgArgDescs[idxCurrentDesc].m_idxDst = sDstArgLoc.m_idxStack++; + rgArgDescs[idxCurrentDesc].m_fDstIsReg = false; + } + + idxCurrentDesc++; + } + } + + // Update descriptor count to the actual number used. + cArgDescriptors = idxCurrentDesc; + + // Note the position at which we have the first move to a stack location + DWORD idxFirstMoveToStack = -1; + + // We have a problem where register to register moves are concerned. Since we're adding an argument the + // moves will be from a lower numbered register to a higher numbered one (e.g. r0 -> r1). But the argument + // descriptors we just produced will order them starting from the lowest registers. If we emit move + // instructions in this order we'll end up copying the value of the lowest register into all of the rest + // (e.g. r0 -> r1, r1 -> r2 etc.). We don't have this problem with stack based arguments since the + // argument stacks don't overlap in the same fashion. To solve this we'll reverse the order of the + // descriptors with register destinations (there will be at most four of these so it's fairly cheap). + if (cArgDescriptors > 1) + { + // Start by assuming we have all four register destination descriptors. + DWORD idxLastRegDesc = min(3, cArgDescriptors - 1); + + // Adjust that count to match reality. + while (!rgArgDescs[idxLastRegDesc].m_fDstIsReg) + { + _ASSERTE(idxLastRegDesc > 0); + idxLastRegDesc--; + } + + // First move to stack location happens after the last move to register location + idxFirstMoveToStack = idxLastRegDesc+1; + + // Calculate how many descriptors we'll need to swap. + DWORD cSwaps = (idxLastRegDesc + 1) / 2; + + // Finally we can swap the descriptors. + DWORD idxFirstRegDesc = 0; + while (cSwaps) + { + ArgDesc sTempDesc = rgArgDescs[idxLastRegDesc]; + rgArgDescs[idxLastRegDesc] = rgArgDescs[idxFirstRegDesc]; + rgArgDescs[idxFirstRegDesc] = sTempDesc; + + _ASSERTE(idxFirstRegDesc < idxLastRegDesc); + idxFirstRegDesc++; + idxLastRegDesc--; + cSwaps--; + } + } + + // If we're ever required to write to the destination stack then we can't implement this case with a + // simple tail call stub. (That's not technically true: there are edge cases caused by 64-bit alignment + // requirements that might allow us to use a simple stub since the extra argument fits in a "hole" in the + // arguments, but these are infrequent enough that it's likely not worth the effort of detecting them). + ArgDesc *pLastArg = cArgDescriptors ? &rgArgDescs[cArgDescriptors - 1] : NULL; + if ((pLastArg == NULL) || pLastArg->m_fDstIsReg) + { + // Simple case where we can just rearrange a few argument registers and tail call. + + for (idxCurrentDesc = 0; idxCurrentDesc < cArgDescriptors; idxCurrentDesc++) + { + // Because we're in the simple case we know we'll never be asked to move a value onto the stack + // and since we're adding a parameter we should never be required to move a value from the stack + // to a register either. So all of the descriptors should be register to register moves. + _ASSERTE(rgArgDescs[idxCurrentDesc].m_fSrcIsReg && rgArgDescs[idxCurrentDesc].m_fDstIsReg); + ThumbEmitMovRegReg(ThumbReg(rgArgDescs[idxCurrentDesc].m_idxDst), + ThumbReg(rgArgDescs[idxCurrentDesc].m_idxSrc)); + } + + // Place instantiation parameter into the correct register. + ArgLocDesc sInstArgLoc; + sDstArgLocations.GetParamTypeLoc(&sInstArgLoc); + int regHidden = sInstArgLoc.m_idxGenReg; + _ASSERTE(regHidden != -1); + if (pHiddenArg) + { + // mov regHidden, #pHiddenArg + ThumbEmitMovConstant(ThumbReg(regHidden), (TADDR)pHiddenArg); + } + else + { + // Extract MethodTable pointer (the hidden arg) from the object instance. + // ldr regHidden, [r0] + ThumbEmitLoadRegIndirect(ThumbReg(regHidden), ThumbReg(0), 0); + } + + if (pHiddenArg == NULL) + { + // Unboxing stub case. + + // Skip over the MethodTable* to find the address of the unboxed value type. + // add r0, #sizeof(MethodTable*) + ThumbEmitIncrement(ThumbReg(0), sizeof(MethodTable*)); + } + + // Emit a tail call to the target method. + ThumbEmitCallManagedMethod(pMD, true); + } + else + { + // Complex case where we need to emit a new stack frame and copy the arguments. + + // Calculate the size of the new stack frame: + // + // +------------+ + // SP -> | | <-+ + // : : | Outgoing arguments + // | | <-+ + // +------------+ + // | Padding | <-- Optional, maybe required so that SP is 64-bit aligned + // +------------+ + // | GS Cookie | + // +------------+ + // +-> | vtable ptr | + // | +------------+ + // | | m_Next | + // | +------------+ + // | | R4 | <-+ + // Stub | +------------+ | + // Helper | : : | + // Frame | +------------+ | Callee saved registers + // | | R11 | | + // | +------------+ | + // | | LR/RetAddr | <-+ + // | +------------+ + // | | R0 | <-+ + // | +------------+ | + // | : : | Argument registers + // | +------------+ | + // +-> | R3 | <-+ + // +------------+ + // Old SP -> | | + // + DWORD cbStackArgs = (pLastArg->m_idxDst + 1) * 4; + DWORD cbStackFrame = cbStackArgs + sizeof(GSCookie) + sizeof(StubHelperFrame); + cbStackFrame = ALIGN_UP(cbStackFrame, 8); + DWORD cbStackFrameWithoutSavedRegs = cbStackFrame - (13 * 4); // r0-r11,lr + + // Prolog: + ThumbEmitProlog(8, // Save r4-r11,lr (count doesn't include lr) + cbStackFrameWithoutSavedRegs, // Additional space in the stack frame required + TRUE); // Push argument registers + + DWORD offsetOfFrame = cbStackFrame - sizeof(StubHelperFrame); + + // Initialize and link the StubHelperFrame and associated GS cookie. + EmitStubLinkFrame(StubHelperFrame::GetMethodFrameVPtr(), offsetOfFrame, StubHelperFrame::GetOffsetOfTransitionBlock()); + + // Initialize temporary registers used when copying arguments: + // r6 == pointer to first incoming stack-based argument + // r7 == pointer to first outgoing stack-based argument + + // add r6, sp, #cbStackFrame + ThumbEmitAdd(ThumbReg(6), thumbRegSp, cbStackFrame); + + // mov r7, sp + ThumbEmitMovRegReg(ThumbReg(7), thumbRegSp); + + // Copy incoming to outgoing arguments. Stack arguments are generally written consecutively and as + // such we use post-increment forms of register indirect addressing to keep our input (r6) and output + // (r7) pointers up to date. But sometimes we'll skip four bytes due to 64-bit alignment requirements + // and need to bump one or both of the pointers to compensate. We determine + // + // At this point, the ArgumentDescriptor array is divied into two parts: + // + // 1) Reverse sorted register to register moves (see the comment earlier in the method for details) + // 2) Register or Stack to Stack moves (if any) in the original order. + // + // Its possible that the register to register moves may move to a target register that happens + // to be a source for the register -> stack move. If this happens, and we emit the argument moves + // in the current order, then we can lose the contents of the register involved in register->stack + // move (stack->stack moves are not a problem as the locations dont overlap). + // + // To address this, we will emit the argument moves in two loops: + // + // 1) First loop will emit the moves that have stack location as the target + // 2) Second loop will emit moves that have register as the target. + DWORD idxCurrentLoopBegin = 0, idxCurrentLoopEnd = cArgDescriptors; + if (idxFirstMoveToStack != -1) + { + _ASSERTE(idxFirstMoveToStack < cArgDescriptors); + idxCurrentLoopBegin = idxFirstMoveToStack; + + for (idxCurrentDesc = idxCurrentLoopBegin; idxCurrentDesc < idxCurrentLoopEnd; idxCurrentDesc++) + { + ArgDesc *pArgDesc = &rgArgDescs[idxCurrentDesc]; + + if (pArgDesc->m_fSrcIsReg) + { + // Source value is in a register. + + _ASSERTE(!pArgDesc->m_fDstIsReg); + // Register to stack. Calculate delta from last stack write; normally it will be 4 bytes + // and our pointer has already been set up correctly by the post increment of the last + // write. But in some cases we need to skip four bytes due to a 64-bit alignment + // requirement. In those cases we need to emit an extra add to keep the pointer correct. + // Note that the first stack argument is guaranteed to be 64-bit aligned by the ABI and as + // such the first stack slot is never skipped. + if ((pArgDesc->m_idxDst > 0) && + (pArgDesc->m_idxDst != (rgArgDescs[idxCurrentDesc - 1].m_idxDst + 1))) + { + _ASSERTE(pArgDesc->m_idxDst == (rgArgDescs[idxCurrentDesc - 1].m_idxDst + 2)); + ThumbEmitIncrement(ThumbReg(7), 4); + } + + // str srcReg, [r7], #4 + ThumbEmitStoreIndirectPostIncrement(pArgDesc->m_idxSrc, ThumbReg(7), 4); + } + else + { + // Source value is on the stack. We should have no cases where a stack argument moves back to + // a register (because we're adding an argument). + _ASSERTE(!pArgDesc->m_fDstIsReg); + + // Stack to stack move. We need to use register (r6) to store the value temporarily between + // the read and the write. See the comments above for why we need to check stack deltas and + // possibly insert extra add instructions in some cases. + if ((pArgDesc->m_idxSrc > 0) && + (pArgDesc->m_idxSrc != (rgArgDescs[idxCurrentDesc - 1].m_idxSrc + 1))) + { + _ASSERTE(pArgDesc->m_idxSrc == (rgArgDescs[idxCurrentDesc - 1].m_idxSrc + 2)); + ThumbEmitIncrement(ThumbReg(6), 4); + } + if ((pArgDesc->m_idxDst > 0) && + (pArgDesc->m_idxDst != (rgArgDescs[idxCurrentDesc - 1].m_idxDst + 1))) + { + _ASSERTE(pArgDesc->m_idxDst == (rgArgDescs[idxCurrentDesc - 1].m_idxDst + 2)); + ThumbEmitIncrement(ThumbReg(7), 4); + } + + // ldr r8, [r6], #4 + ThumbEmitLoadIndirectPostIncrement(ThumbReg(8), ThumbReg(6), 4); + + // str r8, [r7], #4 + ThumbEmitStoreIndirectPostIncrement(ThumbReg(8), ThumbReg(7), 4); + } + } + + // Update the indexes to be used for the second loop + idxCurrentLoopEnd = idxCurrentLoopBegin; + idxCurrentLoopBegin = 0; + } + + // Now, perform the register to register moves + for (idxCurrentDesc = idxCurrentLoopBegin; idxCurrentDesc < idxCurrentLoopEnd; idxCurrentDesc++) + { + ArgDesc *pArgDesc = &rgArgDescs[idxCurrentDesc]; + + // All moves to stack locations have been done (if applicable). + // Since we are moving to a register destination, the source + // will also be a register and cannot be a stack location (refer to the previous loop). + _ASSERTE(pArgDesc->m_fSrcIsReg && pArgDesc->m_fDstIsReg); + + // Register to register case. + ThumbEmitMovRegReg(pArgDesc->m_idxDst, pArgDesc->m_idxSrc); + } + + + // Place instantiation parameter into the correct register. + ArgLocDesc sInstArgLoc; + sDstArgLocations.GetParamTypeLoc(&sInstArgLoc); + int regHidden = sInstArgLoc.m_idxGenReg; + _ASSERTE(regHidden != -1); + if (pHiddenArg) + { + // mov regHidden, #pHiddenArg + ThumbEmitMovConstant(ThumbReg(regHidden), (TADDR)pHiddenArg); + } + else + { + // Extract MethodTable pointer (the hidden arg) from the object instance. + // ldr regHidden, [r0] + ThumbEmitLoadRegIndirect(ThumbReg(regHidden), ThumbReg(0), 0); + } + + if (pHiddenArg == NULL) + { + // Unboxing stub case. + + // Skip over the MethodTable* to find the address of the unboxed value type. + // add r0, #sizeof(MethodTable*) + ThumbEmitIncrement(ThumbReg(0), sizeof(MethodTable*)); + } + + // Emit a regular (non-tail) call to the target method. + ThumbEmitCallManagedMethod(pMD, false); + + // Unlink the StubHelperFrame. + EmitStubUnlinkFrame(); + + // Epilog + ThumbEmitEpilog(); + } +} + +#if defined(FEATURE_SHARE_GENERIC_CODE) +// The stub generated by this method passes an extra dictionary argument before jumping to +// shared-instantiation generic code. +// +// pSharedMD is either +// * An InstantiatedMethodDesc for a generic method whose code is shared across instantiations. +// In this case, the extra argument is the InstantiatedMethodDesc for the instantiation-specific stub itself. +// or * A MethodDesc for a static method in a generic class whose code is shared across instantiations. +// In this case, the extra argument is the MethodTable pointer of the instantiated type. +VOID StubLinkerCPU::EmitInstantiatingMethodStub(MethodDesc* pSharedMD, void* extra) +{ + CONTRACTL + { + THROWS; + GC_TRIGGERS; + INJECT_FAULT(COMPlusThrowOM();); + PRECONDITION(pSharedMD->RequiresInstMethodTableArg() || pSharedMD->RequiresInstMethodDescArg()); + } + CONTRACTL_END; + + // Share code with the instantiating version of the unboxing stub (see below). + ThumbEmitCallWithGenericInstantiationParameter(pSharedMD, extra); +} +#endif // FEATURE_SHARE_GENERIC_CODE + +void StubLinkerCPU::EmitUnboxMethodStub(MethodDesc *pMD) +{ + if (pMD->RequiresInstMethodTableArg()) + { + // In this case we also have to add an instantiating parameter (which is always the MethodTable* from + // the instance we're called on). Most of this code is shared with the instantiating method stub + // above, the NULL parameter informs the emitter that we're both an unboxing stub and that the extra + // parameter can be deduced from the 'this' reference. + ThumbEmitCallWithGenericInstantiationParameter(pMD, NULL); + } + else + { + // We assume that we'll never see a case where a boxed value type method will require an instantiated + // method desc as a parameter. The stubs on other platforms make this assumption (and indeed this + // method isn't even passed an additional instantiation parameter). This is trivially true for the + // non-interface call case: the only methods callable directly on the boxed instance are the methods + // of Object, none of which are generic. For the interface dispatch case we're relying on the fact + // that the jit always provides the instantiating argument explicitly. + _ASSERTE(!pMD->RequiresInstMethodDescArg()); + + // Address of the value type is address of the boxed instance plus four. + // add r0, #4 + ThumbEmitIncrement(ThumbReg(0), 4); + + // Tail call the real target. + ThumbEmitCallManagedMethod(pMD, true /* tail call */); + } +} + +#endif // CROSSGEN_COMPILE + +#endif // !DACCESS_COMPILE + +LONG CLRNoCatchHandler(EXCEPTION_POINTERS* pExceptionInfo, PVOID pv) +{ + return EXCEPTION_CONTINUE_SEARCH; +} + +void UpdateRegDisplayFromCalleeSavedRegisters(REGDISPLAY * pRD, CalleeSavedRegisters * pRegs) +{ + LIMITED_METHOD_CONTRACT; + + T_CONTEXT * pContext = pRD->pCurrentContext; + pContext->R4 = pRegs->r4; + pContext->R5 = pRegs->r5; + pContext->R6 = pRegs->r6; + pContext->R7 = pRegs->r7; + pContext->R8 = pRegs->r8; + pContext->R9 = pRegs->r9; + pContext->R10 = pRegs->r10; + pContext->R11 = pRegs->r11; + pContext->Lr = pRegs->r14; + + T_KNONVOLATILE_CONTEXT_POINTERS * pContextPointers = pRD->pCurrentContextPointers; + pRD->pCurrentContextPointers->R4 = (PDWORD)&pRegs->r4; + pRD->pCurrentContextPointers->R5 = (PDWORD)&pRegs->r5; + pRD->pCurrentContextPointers->R6 = (PDWORD)&pRegs->r6; + pRD->pCurrentContextPointers->R7 = (PDWORD)&pRegs->r7; + pRD->pCurrentContextPointers->R8 = (PDWORD)&pRegs->r8; + pRD->pCurrentContextPointers->R9 = (PDWORD)&pRegs->r9; + pRD->pCurrentContextPointers->R10 = (PDWORD)&pRegs->r10; + pRD->pCurrentContextPointers->R11 = (PDWORD)&pRegs->r11; + pRD->pCurrentContextPointers->Lr = NULL; +} + +#ifndef CROSSGEN_COMPILE +void TransitionFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + pRD->IsCallerContextValid = FALSE; + pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. + + // Copy the saved argument registers into the current context + ArgumentRegisters * pArgRegs = GetArgumentRegisters(); + pRD->pCurrentContext->R0 = pArgRegs->r[0]; + pRD->pCurrentContext->R1 = pArgRegs->r[1]; + pRD->pCurrentContext->R2 = pArgRegs->r[2]; + pRD->pCurrentContext->R3 = pArgRegs->r[3]; + + // Next, copy all the callee saved registers + UpdateRegDisplayFromCalleeSavedRegisters(pRD, GetCalleeSavedRegisters()); + + // Set ControlPC to be the same as the saved "return address" + // value, which is actually a ControlPC in the frameless method (e.g. + // faulting address incase of AV or TAE). + pRD->pCurrentContext->Pc = GetReturnAddress(); + + // Set the caller SP + pRD->pCurrentContext->Sp = this->GetSP(); + + // Finally, syncup the regdisplay with the context + SyncRegDisplayToCurrentContext(pRD); + + LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK TransitionFrame::UpdateRegDisplay(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP)); +} + +void TailCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + pRD->IsCallerContextValid = FALSE; + pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. + + // Next, copy all the callee saved registers + UpdateRegDisplayFromCalleeSavedRegisters(pRD, &m_calleeSavedRegisters); + + // Set ControlPC to be the same as the saved "return address" + // value, which is actually a ControlPC in the frameless method (e.g. + // faulting address incase of AV or TAE). + pRD->pCurrentContext->Pc = m_ReturnAddress; + + // Set the caller SP + pRD->pCurrentContext->Sp = dac_cast<TADDR>(this) + sizeof(*this); + + // Finally, syncup the regdisplay with the context + SyncRegDisplayToCurrentContext(pRD); + + LOG((LF_GCROOTS, LL_INFO100000, "STACKWALK TransitionFrame::UpdateRegDisplay(rip:%p, rsp:%p)\n", pRD->ControlPC, pRD->SP)); +} + +#ifndef DACCESS_COMPILE + +void TailCallFrame::InitFromContext(T_CONTEXT * pContext) +{ + WRAPPER_NO_CONTRACT; + + r4 = pContext->R4; + r5 = pContext->R5; + r6 = pContext->R6; + r7 = pContext->R7; + r8 = pContext->R8; + r9 = pContext->R9; + r10 = pContext->R10; + r11 = pContext->R11; + m_ReturnAddress = pContext->Lr; +} + +#endif // !DACCESS_COMPILE +#endif // !CROSSGEN_COMPILE + +void FaultingExceptionFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + LIMITED_METHOD_DAC_CONTRACT; + + // Copy the context to regdisplay + memcpy(pRD->pCurrentContext, &m_ctx, sizeof(T_CONTEXT)); + + pRD->ControlPC = ::GetIP(&m_ctx); + pRD->SP = ::GetSP(&m_ctx); + + // Update the integer registers in KNONVOLATILE_CONTEXT_POINTERS from + // the exception context we have. + pRD->pCurrentContextPointers->R4 = (PDWORD)&m_ctx.R4; + pRD->pCurrentContextPointers->R5 = (PDWORD)&m_ctx.R5; + pRD->pCurrentContextPointers->R6 = (PDWORD)&m_ctx.R6; + pRD->pCurrentContextPointers->R7 = (PDWORD)&m_ctx.R7; + pRD->pCurrentContextPointers->R8 = (PDWORD)&m_ctx.R8; + pRD->pCurrentContextPointers->R9 = (PDWORD)&m_ctx.R9; + pRD->pCurrentContextPointers->R10 = (PDWORD)&m_ctx.R10; + pRD->pCurrentContextPointers->R11 = (PDWORD)&m_ctx.R11; + pRD->pCurrentContextPointers->Lr = NULL; + + pRD->IsCallerContextValid = FALSE; + pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. +} + +void InlinedCallFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACT_VOID + { + NOTHROW; + GC_NOTRIGGER; + // We should skip over InlinedCallFrame if it is not active. + // It will be part of a JITed method's frame, and the stack-walker + // can handle such a case. +#ifdef PROFILING_SUPPORTED + PRECONDITION(CORProfilerStackSnapshotEnabled() || InlinedCallFrame::FrameHasActiveCall(this)); +#endif + HOST_NOCALLS; + MODE_ANY; + SUPPORTS_DAC; + } + CONTRACT_END; + + // @TODO: Remove this after the debugger is fixed to avoid stack-walks from bad places + // @TODO: This may be still needed for sampling profilers + if (!InlinedCallFrame::FrameHasActiveCall(this)) + { + LOG((LF_CORDB, LL_ERROR, "WARNING: InlinedCallFrame::UpdateRegDisplay called on inactive frame %p\n", this)); + return; + } + + // reset pContext; it's only valid for active (top-most) frame + pRD->pContext = NULL; + + *(pRD->pPC) = m_pCallerReturnAddress; + pRD->SP = (DWORD) dac_cast<TADDR>(m_pCallSiteSP); + + pRD->IsCallerContextValid = FALSE; + pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. + + pRD->pCurrentContext->Pc = *(pRD->pPC); + pRD->pCurrentContext->Sp = pRD->SP; + + // Update the frame pointer in the current context. + pRD->pCurrentContext->R11 = m_pCalleeSavedFP; + pRD->pCurrentContextPointers->R11 = &m_pCalleeSavedFP; + + // This is necessary to unwind methods with alloca. This needs to stay + // in sync with definition of REG_SAVED_LOCALLOC_SP in the JIT. + pRD->pCurrentContext->R9 = (DWORD) dac_cast<TADDR>(m_pCallSiteSP); + pRD->pCurrentContextPointers->R9 = (DWORD *)&m_pCallSiteSP; + + RETURN; +} + +#ifdef FEATURE_HIJACK +TADDR ResumableFrame::GetReturnAddressPtr(void) +{ + LIMITED_METHOD_DAC_CONTRACT; + return dac_cast<TADDR>(m_Regs) + offsetof(T_CONTEXT, Pc); +} + +void ResumableFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACT_VOID + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + SUPPORTS_DAC; + } + CONTRACT_END; + + CopyMemory(pRD->pCurrentContext, m_Regs, sizeof(T_CONTEXT)); + + pRD->ControlPC = m_Regs->Pc; + pRD->SP = m_Regs->Sp; + + pRD->pCurrentContextPointers->R4 = &m_Regs->R4; + pRD->pCurrentContextPointers->R5 = &m_Regs->R5; + pRD->pCurrentContextPointers->R6 = &m_Regs->R6; + pRD->pCurrentContextPointers->R7 = &m_Regs->R7; + pRD->pCurrentContextPointers->R8 = &m_Regs->R8; + pRD->pCurrentContextPointers->R9 = &m_Regs->R9; + pRD->pCurrentContextPointers->R10 = &m_Regs->R10; + pRD->pCurrentContextPointers->R11 = &m_Regs->R11; + pRD->pCurrentContextPointers->Lr = &m_Regs->Lr; + + pRD->volatileCurrContextPointers.R0 = &m_Regs->R0; + pRD->volatileCurrContextPointers.R1 = &m_Regs->R1; + pRD->volatileCurrContextPointers.R2 = &m_Regs->R2; + pRD->volatileCurrContextPointers.R3 = &m_Regs->R3; + pRD->volatileCurrContextPointers.R12 = &m_Regs->R12; + + pRD->IsCallerContextValid = FALSE; + pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. +} + +void HijackFrame::UpdateRegDisplay(const PREGDISPLAY pRD) +{ + CONTRACTL { + NOTHROW; + GC_NOTRIGGER; + SUPPORTS_DAC; + } + CONTRACTL_END; + + pRD->IsCallerContextValid = FALSE; + pRD->IsCallerSPValid = FALSE; + + pRD->pCurrentContext->Pc = m_ReturnAddress; + pRD->pCurrentContext->Sp = PTR_TO_TADDR(m_Args) + sizeof(struct HijackArgs); + + pRD->pCurrentContext->R0 = m_Args->R0; + + pRD->pCurrentContext->R4 = m_Args->R4; + pRD->pCurrentContext->R5 = m_Args->R5; + pRD->pCurrentContext->R6 = m_Args->R6; + pRD->pCurrentContext->R7 = m_Args->R7; + pRD->pCurrentContext->R8 = m_Args->R8; + pRD->pCurrentContext->R9 = m_Args->R9; + pRD->pCurrentContext->R10 = m_Args->R10; + pRD->pCurrentContext->R11 = m_Args->R11; + + pRD->pCurrentContextPointers->R4 = &m_Args->R4; + pRD->pCurrentContextPointers->R5 = &m_Args->R5; + pRD->pCurrentContextPointers->R6 = &m_Args->R6; + pRD->pCurrentContextPointers->R7 = &m_Args->R7; + pRD->pCurrentContextPointers->R8 = &m_Args->R8; + pRD->pCurrentContextPointers->R9 = &m_Args->R9; + pRD->pCurrentContextPointers->R10 = &m_Args->R10; + pRD->pCurrentContextPointers->R11 = &m_Args->R11; + pRD->pCurrentContextPointers->Lr = NULL; + + SyncRegDisplayToCurrentContext(pRD); +} +#endif + +void PInvokeStubForHost(void) +{ + // Hosted P/Invoke is not implemented on ARM. See ARMTODO in code:CorHost2::SetHostControl. + UNREACHABLE(); +} + +class UMEntryThunk * UMEntryThunk::Decode(void *pCallback) +{ + _ASSERTE(offsetof(UMEntryThunkCode, m_code) == 0); + UMEntryThunkCode * pCode = (UMEntryThunkCode*)((ULONG_PTR)pCallback & ~THUMB_CODE); + + // We may be called with an unmanaged external code pointer instead. So if it doesn't look like one of our + // stubs (see UMEntryThunkCode::Encode below) then we'll return NULL. Luckily in these scenarios our + // caller will perform a hash lookup on successful return to verify our result in case random unmanaged + // code happens to look like ours. + if ((pCode->m_code[0] == 0xf8df) && + (pCode->m_code[1] == 0xc008) && + (pCode->m_code[2] == 0xf8df) && + (pCode->m_code[3] == 0xf000)) + { + return (UMEntryThunk*)pCode->m_pvSecretParam; + } + + return NULL; +} + +void UMEntryThunkCode::Encode(BYTE* pTargetCode, void* pvSecretParam) +{ + // ldr r12, [pc + 8] + m_code[0] = 0xf8df; + m_code[1] = 0xc008; + // ldr pc, [pc] + m_code[2] = 0xf8df; + m_code[3] = 0xf000; + + m_pTargetCode = (TADDR)pTargetCode; + m_pvSecretParam = (TADDR)pvSecretParam; + + FlushInstructionCache(GetCurrentProcess(),&m_code,sizeof(m_code)); +} + +///////////////////////////// UNIMPLEMENTED ////////////////////////////////// + +#ifndef DACCESS_COMPILE + +#ifndef CROSSGEN_COMPILE + + +EXTERN_C DWORD gThreadTLSIndex; +EXTERN_C DWORD gAppDomainTLSIndex; + + +EXTERN_C Object* JIT_TrialAllocSFastMP_InlineGetThread(CORINFO_CLASS_HANDLE typeHnd_); +EXTERN_C Object* JIT_BoxFastMP_InlineGetThread (CORINFO_CLASS_HANDLE type, void* unboxedData); +EXTERN_C Object* AllocateStringFastMP_InlineGetThread (CLR_I4 cch); +EXTERN_C Object* JIT_NewArr1OBJ_MP_InlineGetThread (CORINFO_CLASS_HANDLE arrayTypeHnd_, INT_PTR size); +EXTERN_C Object* JIT_NewArr1VC_MP_InlineGetThread (CORINFO_CLASS_HANDLE arrayTypeHnd_, INT_PTR size); + +EXTERN_C void JIT_TrialAllocSFastMP_InlineGetThread__PatchTLSOffset(); +EXTERN_C void JIT_BoxFastMP_InlineGetThread__PatchTLSOffset(); +EXTERN_C void AllocateStringFastMP_InlineGetThread__PatchTLSOffset(); +EXTERN_C void JIT_NewArr1VC_MP_InlineGetThread__PatchTLSOffset(); +EXTERN_C void JIT_NewArr1OBJ_MP_InlineGetThread__PatchTLSOffset(); + +extern "C" void STDCALL JIT_PatchedCodeStart(); +extern "C" void STDCALL JIT_PatchedCodeLast(); + +#ifndef FEATURE_IMPLICIT_TLS +static const LPVOID InlineGetThreadLocations[] = { + (PVOID)JIT_TrialAllocSFastMP_InlineGetThread__PatchTLSOffset, + (PVOID)JIT_BoxFastMP_InlineGetThread__PatchTLSOffset, + (PVOID)AllocateStringFastMP_InlineGetThread__PatchTLSOffset, + (PVOID)JIT_NewArr1VC_MP_InlineGetThread__PatchTLSOffset, + (PVOID)JIT_NewArr1OBJ_MP_InlineGetThread__PatchTLSOffset, +}; +#endif + +//EXTERN_C Object* JIT_TrialAllocSFastMP(CORINFO_CLASS_HANDLE typeHnd_); +Object* JIT_TrialAllocSFastMP(CORINFO_CLASS_HANDLE typeHnd_); +EXTERN_C Object* JIT_NewArr1OBJ_MP(CORINFO_CLASS_HANDLE arrayTypeHnd_, INT_PTR size); +EXTERN_C Object* AllocateStringFastMP(CLR_I4 cch); +EXTERN_C Object* JIT_NewArr1VC_MP(CORINFO_CLASS_HANDLE arrayTypeHnd_, INT_PTR size); +EXTERN_C Object* JIT_BoxFastMP(CORINFO_CLASS_HANDLE type, void* unboxedData); + + +EXTERN_C void JIT_GetSharedNonGCStaticBase__PatchTLSLabel(); +EXTERN_C void JIT_GetSharedNonGCStaticBaseNoCtor__PatchTLSLabel(); +EXTERN_C void JIT_GetSharedGCStaticBase__PatchTLSLabel(); +EXTERN_C void JIT_GetSharedGCStaticBaseNoCtor__PatchTLSLabel(); + +EXTERN_C void JIT_GetSharedNonGCStaticBase_SingleAppDomain(); +EXTERN_C void JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain(); +EXTERN_C void JIT_GetSharedGCStaticBase_SingleAppDomain(); +EXTERN_C void JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain(); + + +static const LPVOID InlineGetAppDomainLocations[] = { + (PVOID)JIT_GetSharedNonGCStaticBase__PatchTLSLabel, + (PVOID)JIT_GetSharedNonGCStaticBaseNoCtor__PatchTLSLabel, + (PVOID)JIT_GetSharedGCStaticBase__PatchTLSLabel, + (PVOID)JIT_GetSharedGCStaticBaseNoCtor__PatchTLSLabel +}; + +#ifndef FEATURE_IMPLICIT_TLS +void FixupInlineGetters(DWORD tlsSlot, const LPVOID * pLocations, int nLocations) +{ + STANDARD_VM_CONTRACT; + + for (int i=0; i<nLocations; i++) + { + BYTE * pInlineGetter = (BYTE *)PCODEToPINSTR(GetEEFuncEntryPoint(pLocations[i])); + + DWORD offset = (tlsSlot * sizeof(LPVOID) + offsetof(TEB, TlsSlots)); + + // ldr r??, [r??, #offset] + _ASSERTE_ALL_BUILDS("clr/src/VM/arm/stubs.cpp", + pInlineGetter[0] == 0x1d && + pInlineGetter[1] == 0xee && + pInlineGetter[2] == 0x50 && + pInlineGetter[5] == 0xf8 && + "Initialization failure while stomping instructions for the TLS slot offset: " + "the instruction at the given offset did not match what we expect"); + + *((WORD*)(pInlineGetter + 6)) &= 0xf000; + + _ASSERTE(offset <=4095); + *((WORD*)(pInlineGetter + 6)) |= (WORD)offset; + } +} +#endif + +void InitJITHelpers1() +{ + STANDARD_VM_CONTRACT; + +#ifndef FEATURE_IMPLICIT_TLS + + if (gThreadTLSIndex < TLS_MINIMUM_AVAILABLE) + { + FixupInlineGetters(gThreadTLSIndex, InlineGetThreadLocations, COUNTOF(InlineGetThreadLocations)); + } + + if (gAppDomainTLSIndex < TLS_MINIMUM_AVAILABLE) + { + FixupInlineGetters(gAppDomainTLSIndex, InlineGetAppDomainLocations, COUNTOF(InlineGetAppDomainLocations)); + } + + if(gThreadTLSIndex < TLS_MINIMUM_AVAILABLE || gAppDomainTLSIndex < TLS_MINIMUM_AVAILABLE) + { + FlushInstructionCache(GetCurrentProcess(), JIT_PatchedCodeStart, (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart); + } + +#if CHECK_APP_DOMAIN_LEAKS + if(g_pConfig->AppDomainLeaks()) + SetJitHelperFunction(CORINFO_HELP_ARRADDR_ST, JIT_Stelem_Ref_Portable); +#endif + + // Allocation helpers, faster but non-logging. + if (!(TrackAllocationsEnabled() + || LoggingOn(LF_GCALLOC, LL_INFO10) +#ifdef _DEBUG + || (g_pConfig->ShouldInjectFault(INJECTFAULT_GCHEAP) != 0) +#endif // _DEBUG + )) + { + + _ASSERTE(GCHeap::UseAllocationContexts()); + // If the TLS for Thread is low enough use the super-fast helpers + if (gThreadTLSIndex < TLS_MINIMUM_AVAILABLE) + { + SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_TrialAllocSFastMP_InlineGetThread); + SetJitHelperFunction(CORINFO_HELP_BOX, JIT_BoxFastMP_InlineGetThread); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_InlineGetThread); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_InlineGetThread); + + ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateStringFastMP_InlineGetThread), ECall::FastAllocateString); + } + else + { +/* + SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_TrialAllocSFastMP); + SetJitHelperFunction(CORINFO_HELP_BOX, JIT_BoxFastMP); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP); + SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP); + + ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateStringFastMP), ECall::FastAllocateString); +*/ + } + } + + +#ifdef FEATURE_CORECLR + if(IsSingleAppDomain()) + { + SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE, JIT_GetSharedGCStaticBase_SingleAppDomain); + SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE, JIT_GetSharedNonGCStaticBase_SingleAppDomain); + SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR, JIT_GetSharedGCStaticBaseNoCtor_SingleAppDomain); + SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR,JIT_GetSharedNonGCStaticBaseNoCtor_SingleAppDomain); + } + else +#endif + if (gAppDomainTLSIndex >= TLS_MINIMUM_AVAILABLE) + { + SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE, JIT_GetSharedGCStaticBase_Portable); + SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE, JIT_GetSharedNonGCStaticBase_Portable); + SetJitHelperFunction(CORINFO_HELP_GETSHARED_GCSTATIC_BASE_NOCTOR, JIT_GetSharedGCStaticBaseNoCtor_Portable); + SetJitHelperFunction(CORINFO_HELP_GETSHARED_NONGCSTATIC_BASE_NOCTOR,JIT_GetSharedNonGCStaticBaseNoCtor_Portable); + } +#endif +} + +extern "C" Object *SetAppDomainInObject(Object *pObject) +{ + pObject->SetAppDomain(); + return pObject; +} + +// +64 stack-based arguments here +// -- MulticastFrame end +// +48 r0-r3 argument registers +// +44 lr return address +// +40 fp frame pointer +// +12 r4-r10 callee saved registers +// +8 datum (typically a MethodDesc*) +// +4 m_Next +// +0 the frame vptr +// -- MulticastFrame start +// -4 gs cookie +// -... floating point argument registers +void StubLinkerCPU::EmitMulticastInvoke(UINT_PTR hash) +{ + //Decode Multicast Delegate hash + unsigned int numStackBytes = hash >> 8; + _ASSERTE(numStackBytes <= 0x7fff); + + unsigned int numFPRegs = (hash & 0xf8) >> 3; + _ASSERTE(numFPRegs <= 16); + + unsigned int numGenRegs = hash & 0x7; + _ASSERTE(numGenRegs <= 4); + + DWORD offsetOfFPRegs = 0; + + DWORD cbStackFrame = numStackBytes; + if (numFPRegs) + { + cbStackFrame = ALIGN_UP(cbStackFrame, 8); + offsetOfFPRegs = cbStackFrame; + cbStackFrame += 4 * numFPRegs; + } + cbStackFrame += sizeof(GSCookie) + sizeof(MulticastFrame); + cbStackFrame = ALIGN_UP(cbStackFrame, 8); + DWORD cbStackFrameWithoutSavedRegs = cbStackFrame - (13 * 4); // r0-r11,lr + + // Prolog: + ThumbEmitProlog(8, // Save r4-r11,lr (count doesn't include lr) + cbStackFrameWithoutSavedRegs, // Additional space in the stack frame required + TRUE); // Push argument registers + + DWORD offsetOfFrame = cbStackFrame - sizeof(MulticastFrame); + + // Move the MethodDesc* we're calling to r12. + // ldr r12, [r0, #offsetof(DelegateObject, _methodPtrAux)] + ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(0), DelegateObject::GetOffsetOfMethodPtrAux()); + + // Initialize MulticastFrame::m_pMD to the MethodDesc* we're calling + // str r12, [sp + #(offsetOfFrame + offsetof(MulticastFrame, m_pMD))] + ThumbEmitStoreRegIndirect(ThumbReg(12), thumbRegSp, offsetOfFrame + MulticastFrame::GetOffsetOfDatum()); + + if (numFPRegs) + { + ThumbEmitAdd(ThumbReg(4), thumbRegSp, offsetOfFPRegs); + + // save floating point arguments at offsetOfFPRegs + //vstm{IA} R4,{s0-s(numFPRegs -1)} + Emit16(0xec84); + Emit16(0x0a00 | (WORD)numFPRegs); + } + + // Initialize and link the MulticastFrame and associated GS cookie. + EmitStubLinkFrame(MulticastFrame::GetMethodFrameVPtr(), offsetOfFrame, MulticastFrame::GetOffsetOfTransitionBlock()); + + //r7 as counter. Initialize it to 0. + // mov r7, 0 + ThumbEmitMovConstant(ThumbReg(7), 0); + + //initialize r9 to _invocationCount + ThumbEmitLoadRegIndirect(ThumbReg(9), ThumbReg(0), DelegateObject::GetOffsetOfInvocationCount()); + + CodeLabel *pLoopLabel = NewCodeLabel(); + CodeLabel *pEndLoopLabel = NewCodeLabel(); + + //loop: + EmitLabel(pLoopLabel); + + // cmp r7, r9 + ThumbEmitCmpReg(ThumbReg(7), ThumbReg(9)); + + // if equal goto endloop + // beq endloop + ThumbEmitCondFlagJump(pEndLoopLabel, 0); + + UINT32 count = 0; + if(numStackBytes) + { + //r1 = pos for stack args in Frame + ThumbEmitAdd(ThumbReg(1), ThumbReg(4), MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs()); + + //r2 = stack pos for args of calling func + ThumbEmitMovRegReg(ThumbReg(2), thumbRegSp); + + // ..move stack args.. + _ASSERTE(numStackBytes%4 == 0); + while (count != numStackBytes) + { + ThumbEmitLoadIndirectPostIncrement(ThumbReg(0), ThumbReg(1), 4); + ThumbEmitStoreIndirectPostIncrement(ThumbReg(0), ThumbReg(2), 4); + count += 4; + } + } + + count = 1; + while(count < numGenRegs) + { + ThumbEmitLoadRegIndirect(ThumbReg(count), ThumbReg(4), MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgumentRegisters() + count*4); + count++; + } + + if(numFPRegs) + { + ThumbEmitAdd(ThumbReg(0), thumbRegSp, offsetOfFPRegs); + //vldm{IA}.32 R0, s0-s(numFPRegs-1) + Emit16(0xec90); + Emit16(0x0a00 | (WORD)numFPRegs); + } + + //ldr r0, [r4+0x30] // get the first argument + ThumbEmitLoadRegIndirect(ThumbReg(0),ThumbReg(4), MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgumentRegisters()); + + // ldr r6, [r0+0x14] //invocationList + ThumbEmitLoadRegIndirect(ThumbReg(6), ThumbReg(0), DelegateObject::GetOffsetOfInvocationList()); + + // r6 - address of first delegate in invocation list + // add r6,r6,0xC + ThumbEmitAdd(ThumbReg(6), ThumbReg(6), PtrArray::GetDataOffset()); + + //ldr r8,[r6+r7*4] //get delegate object + ThumbEmitLoadOffsetScaledReg(ThumbReg(8), ThumbReg(6), ThumbReg(7), 2); + + // ldr r0, [r8+0x04] //_target from the delegate + ThumbEmitLoadRegIndirect(ThumbReg(0), ThumbReg(8), DelegateObject::GetOffsetOfTarget()); + + // ldr r8, [r8+0xC] // methodPtr from the delegate + ThumbEmitLoadRegIndirect(ThumbReg(8), ThumbReg(8), DelegateObject::GetOffsetOfMethodPtr()); + + //call delegate + ThumbEmitCallRegister(ThumbReg(8)); + + //increment counter + ThumbEmitAdd(ThumbReg(7), ThumbReg(7), 1); + + // The debugger may need to stop here, so grab the offset of this code. + EmitPatchLabel(); + + //goto loop + ThumbEmitNearJump(pLoopLabel); + + //endloop: + EmitLabel(pEndLoopLabel); + + + //At this point of the stub: + //r4 must point to Frame + //and r5 must be current Thread* + + EmitStubUnlinkFrame(); + + // Epilog + ThumbEmitEpilog(); +} + +void StubLinkerCPU::EmitSecureDelegateInvoke(UINT_PTR hash) +{ + //Decode Multicast Delegate hash + unsigned int numStackBytes = hash >> 8; + _ASSERTE(numStackBytes <= 0x7fff); + + DWORD cbStackFrame = numStackBytes + sizeof(GSCookie) + sizeof(SecureDelegateFrame); + cbStackFrame = ALIGN_UP(cbStackFrame, 8); + DWORD cbStackFrameWithoutSavedRegs = cbStackFrame - (13 * 4); // r0-r11,lr + + // Prolog: + ThumbEmitProlog(8, // Save r4-r11,lr (count doesn't include lr) + cbStackFrameWithoutSavedRegs, // Additional space in the stack frame required + TRUE); // Push argument registers + + DWORD offsetOfFrame = cbStackFrame - sizeof(SecureDelegateFrame); + + // Move the MethodDesc* we're calling to r12. + // ldr r12, [r0, #offsetof(DelegateObject, _invocationCount)] + ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(0), DelegateObject::GetOffsetOfInvocationCount()); + + // Initialize SecureDelegateFrame::m_pMD to the MethodDesc* we're calling + // str r12, [sp + #(offsetOfFrame + offsetof(SecureDelegateFrame, m_pMD))] + ThumbEmitStoreRegIndirect(ThumbReg(12), thumbRegSp, offsetOfFrame + SecureDelegateFrame::GetOffsetOfDatum()); + + // Initialize and link the SecureDelegateFrame and associated GS cookie. + EmitStubLinkFrame(SecureDelegateFrame::GetMethodFrameVPtr(), offsetOfFrame, SecureDelegateFrame::GetOffsetOfTransitionBlock()); + + // At this point: + // r0 : secure delegate + // r4 : SecureDelegateFrame * + // r5 : Thread * + + if (numStackBytes) + { + // Copy stack based arguments from the calling frame into this one. Use the following registers: + // r6 : pointer to source arguments + // r7 : pointer to destination arguments + // r8 : temporary storage during copy operation + + // add r6, r4, #MulticastFrame::GetOffsetOfArgs() + ThumbEmitAdd(ThumbReg(6), ThumbReg(4), MulticastFrame::GetOffsetOfTransitionBlock() + TransitionBlock::GetOffsetOfArgs()); + + // mov r7, sp + ThumbEmitMovRegReg(ThumbReg(7), thumbRegSp); + + // Unrolled loop to copy the stack based arguments. Might want to consider a second path with a loop + // for large argument lists if anyone complains about this. + _ASSERTE((numStackBytes % 4) == 0); + for (unsigned int i = 0; i < numStackBytes; i += 4) + { + // Read one 4-byte value from the source stack and copy it to the new stack, post-incrementing + // both source and destination as we go. + // ldr r8, [r6], #4 + // str r8, [r7], #4 + ThumbEmitLoadIndirectPostIncrement(ThumbReg(8), ThumbReg(6), 4); + ThumbEmitStoreIndirectPostIncrement(ThumbReg(8), ThumbReg(7), 4); + } + } + + // Stack-based arguments are copied. Floating point argument registers and r1-r3 are all still correct. + // All we need to do now is calculate the real value for r0 and the target address. Secure delegates wrap + // an inner delegate (kept in _invocationList). We retrieve this inner delegate and then perform the usual + // delegate invocation pattern on that. + + // Get "real" delegate. + // ldr r0, [r0, #offsetof(DelegateObject, _invocationList)] + ThumbEmitLoadRegIndirect(ThumbReg(0), ThumbReg(0), DelegateObject::GetOffsetOfInvocationList()); + + // Load the destination address from the inner delegate. + // ldr r12, [r0, #offsetof(DelegateObject, _methodPtr)] + ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(0), DelegateObject::GetOffsetOfMethodPtr()); + + // This is only required for unbound delegates which use VSD stubs..but does not harm if done unconditionally + // add r4, r0+#offsetof(DelegateObject, _methodPtrAux) ; // r4 now contains indirection cell + ThumbEmitAdd(ThumbReg(4), ThumbReg(0), DelegateObject::GetOffsetOfMethodPtrAux()); + + // Replace the delegate reference with the object cached as the delegate's target. + // ldr r0, [r0, #offsetof(DelegateObject, _target)] + ThumbEmitLoadRegIndirect(ThumbReg(0), ThumbReg(0), DelegateObject::GetOffsetOfTarget()); + + // Perform the call. + // blx r12 + ThumbEmitCallRegister(ThumbReg(12)); + + // restore frame pointer in r4 + ThumbEmitAdd(ThumbReg(4), thumbRegSp, offsetOfFrame); + + // Unlink SecureDelegateFrame. This requires the frame pointer in r4 and the thread pointer in r5. + EmitStubUnlinkFrame(); + + // Epilog + ThumbEmitEpilog(); +} + +//The function expects r4 to point to frame +//and r5 must be current Thread* +void StubLinkerCPU::EmitStubUnlinkFrame() +{ +#ifdef _DEBUG + // EmitStubUnlinkFrame is emitted just before the epilog. + // Thus, at this point, all other callee-saved registers + // could be used since we are anyways going to restore them + // via epilog execution. + + // Ensure that GSCookie is valid + // + // ldr r6, [r4-4]; Load the value of GSCookie + ThumbEmitSub(ThumbReg(6), ThumbReg(4), 4); + ThumbEmitLoadRegIndirect(ThumbReg(6), ThumbReg(6), 0); + + // mov r7, s_gsCookie + ThumbEmitMovConstant(ThumbReg(7), GetProcessGSCookie()); + + // cmp r6, r7 ; Are the GSCookie values in sync? + ThumbEmitCmpReg(ThumbReg(6), ThumbReg(7)); + + CodeLabel *pAllDoneLabel = NewCodeLabel(); + + // beq AllDone; yes, GSCookie is good. + ThumbEmitCondFlagJump(pAllDoneLabel, 0); + + // If we are here, then GSCookie was bad. + // Call into DoJITFailFast. + // + // mov r12, DoJITFailFast + ThumbEmitMovConstant(ThumbReg(12), (int)DoJITFailFast); + // bl r12 + ThumbEmitCallRegister(ThumbReg(12)); + // Emit a breakpoint - we are not expected to come here at all + // if we performed a FailFast. + ThumbEmitBreakpoint(); + + //AllDone: + EmitLabel(pAllDoneLabel); +#endif // _DEBUG + + // Unlink the MulticastFrame. + // ldr r6, [r4 + #offsetof(MulticastFrame, m_Next)] + // str r6, [r5 + #offsetof(Thread, m_pFrame)] + ThumbEmitLoadRegIndirect(ThumbReg(6), ThumbReg(4), Frame::GetOffsetOfNextLink()); + ThumbEmitStoreRegIndirect(ThumbReg(6), ThumbReg(5), offsetof(Thread, m_pFrame)); + +} + +//pFrameVptr = vtable ptr of Frame +//offsetOfFrame = Frame offset in bytes from sp +//After this method: r4 points to the Frame on stack +// and r5 has current Thread* +void StubLinkerCPU::EmitStubLinkFrame(TADDR pFrameVptr, int offsetOfFrame, int offsetOfTransitionBlock) +{ + // Initialize r4 to point to where we start filling the frame. + ThumbEmitAdd(ThumbReg(4), thumbRegSp, offsetOfFrame - sizeof(GSCookie)); + + // Write the initial GS cookie value + // mov r5, s_gsCookie + // str r5, [r4] + ThumbEmitMovConstant(ThumbReg(5), s_gsCookie); + ThumbEmitStoreIndirectPostIncrement(ThumbReg(5), ThumbReg(4), 4); + + // Initialize the vtable pointer. + // mov r5, #vfptr + // str r5, [r4 + #offsetof(Frame, _vfptr)] + ThumbEmitMovConstant(ThumbReg(5), pFrameVptr); + ThumbEmitStoreRegIndirect(ThumbReg(5), ThumbReg(4), 0); + + // Link the frame to the thread's frame chain. + // r5 <- current Thread* + // ldr r6, [r5 + #offsetof(Thread, m_pFrame)] + // str r6, [r4 + #offsetof(MulticastFrame, m_Next)] + // str r4, [r5 + #offsetof(Thread, m_pFrame)] + +#ifdef FEATURE_IMPLICIT_TLS + TLSACCESSMODE mode = TLSACCESS_GENERIC; +#else + TLSACCESSMODE mode = GetTLSAccessMode(GetThreadTLSIndex()); +#endif + ThumbEmitGetThread(mode, ThumbReg(5)); + if (mode == TLSACCESS_GENERIC) + { + // reload argument registers that could have been corrupted by the call + for (int reg = 0; reg < 4; reg++) + ThumbEmitLoadRegIndirect(ThumbReg(reg), ThumbReg(4), + offsetOfTransitionBlock + TransitionBlock::GetOffsetOfArgumentRegisters() + offsetof(ArgumentRegisters, r[reg])); + } + + ThumbEmitLoadRegIndirect(ThumbReg(6), ThumbReg(5), Thread::GetOffsetOfCurrentFrame()); + ThumbEmitStoreRegIndirect(ThumbReg(6), ThumbReg(4), Frame::GetOffsetOfNextLink()); + ThumbEmitStoreRegIndirect(ThumbReg(4), ThumbReg(5), Thread::GetOffsetOfCurrentFrame()); +} + +#endif // CROSSGEN_COMPILE + +void StubLinkerCPU::ThumbEmitNearJump(CodeLabel *target) +{ + WRAPPER_NO_CONTRACT; + EmitLabelRef(target, reinterpret_cast<ThumbNearJump&>(gThumbNearJump), 0xe); +} + +void StubLinkerCPU::ThumbEmitCondFlagJump(CodeLabel *target, UINT cond) +{ + WRAPPER_NO_CONTRACT; + EmitLabelRef(target, reinterpret_cast<ThumbNearJump&>(gThumbNearJump), cond); +} + +void StubLinkerCPU::ThumbEmitCondRegJump(CodeLabel *target, BOOL nonzero, ThumbReg reg) +{ + WRAPPER_NO_CONTRACT; + _ASSERTE(reg <= 7); + UINT variation = reg; + if(nonzero) + variation = variation | 0x8; + EmitLabelRef(target, reinterpret_cast<ThumbCondJump&>(gThumbCondJump), variation); +} + +unsigned int StubLinkerCPU::HashMulticastInvoke(MetaSig *pSig) +{ + // Generate a hash key as follows: + // Bit0-2 : num of general purpose registers used + // Bit3-7 : num of FP regs used (counting in terms of s0,s1...) + // Bit8-22 : num of stack bytes used + + ArgIterator delegateCallConv(pSig); + + UINT numStackBytes = delegateCallConv.SizeOfArgStack(); + + if (numStackBytes > 0x7FFF) + COMPlusThrow(kNotSupportedException, W("NotSupported_TooManyArgs")); + + int cGenReg = 1; // r0 is always used for this pointer + int cFPReg = 0; + + // if it has a return buffer argument r1 is also used + if(delegateCallConv.HasRetBuffArg()) + cGenReg = 2; + + int argOffset; + while ((argOffset = delegateCallConv.GetNextOffset()) != TransitionBlock::InvalidOffset) + { + ArgLocDesc currArgLoc; + delegateCallConv.GetArgLoc(argOffset, &currArgLoc); + + if(currArgLoc.m_idxGenReg != -1) + cGenReg = currArgLoc.m_idxGenReg + currArgLoc.m_cGenReg; + + if(currArgLoc.m_idxFloatReg != -1) + cFPReg = currArgLoc.m_idxFloatReg + currArgLoc.m_cFloatReg; + } + + // only r0-r3 can be used for arguments + _ASSERTE(cGenReg <= 4); + + // only s0-s15 can be used for arguments + _ASSERTE(cFPReg <= 16); + + return (numStackBytes << 8 | cFPReg << 3 | cGenReg); +} + +void StubLinkerCPU::ThumbCopyOneTailCallArg(UINT * pnSrcAlign, const ArgLocDesc * pArgLoc, UINT * pcbStackSpace) +{ + if (pArgLoc->m_fRequires64BitAlignment && (*pnSrcAlign & 1)) { + // ADD R0, #4 + ThumbEmitIncrement(ThumbReg(0), 4); + *pnSrcAlign = 0; + } + + // Integer register arguments + if (pArgLoc->m_cGenReg > 0) { + int iReg = pArgLoc->m_idxGenReg; + int maxReg = iReg + pArgLoc->m_cGenReg; + while (iReg + 2 <= maxReg) { + // LDM r0!, {r4,r5} ; Post incremented loads (2 bytes) + ThumbEmitLoadStoreMultiple(ThumbReg(0), true, ThumbReg(4).Mask() | ThumbReg(5).Mask()); + // STR r4, [R1, #offset of arg reg] ; (2 bytes) + ThumbEmitStoreRegIndirect(ThumbReg(4), ThumbReg(1), offsetof(T_CONTEXT, R0) + (iReg * sizeof(DWORD))); + iReg++; + // STR r5, [R1, #offset of arg reg] ; (2 bytes) + ThumbEmitStoreRegIndirect(ThumbReg(5), ThumbReg(1), offsetof(T_CONTEXT, R0) + (iReg * sizeof(DWORD))); + iReg++; + } + if (iReg < maxReg) { + // LDR r3, [R0], #+4 ; Post incremented load (4 bytes) + ThumbEmitLoadIndirectPostIncrement(ThumbReg(3), ThumbReg(0), 4); + (*pnSrcAlign)++; + + // STR r3, [R1, #offset of arg reg] ; (2 bytes) + ThumbEmitStoreRegIndirect(ThumbReg(3), ThumbReg(1), offsetof(T_CONTEXT, R0) + (iReg * sizeof(DWORD))); + } + } + if (pArgLoc->m_cFloatReg > 0) { + int iReg = pArgLoc->m_idxFloatReg; + int maxReg = iReg + pArgLoc->m_cFloatReg; + while (iReg + 2 <= maxReg) { + // LDM r0!, {r4,r5} ; Post incremented loads (2 bytes) + ThumbEmitLoadStoreMultiple(ThumbReg(0), true, ThumbReg(4).Mask() | ThumbReg(5).Mask()); + // STR r4, [R1, #offset of arg reg] ; (2 bytes) + ThumbEmitStoreRegIndirect(ThumbReg(4), ThumbReg(1), offsetof(T_CONTEXT, S) + (iReg * sizeof(DWORD))); + iReg++; + // STR r5, [R1, #offset of arg reg] ; (2 bytes) + ThumbEmitStoreRegIndirect(ThumbReg(5), ThumbReg(1), offsetof(T_CONTEXT, S) + (iReg * sizeof(DWORD))); + iReg++; + } + if (iReg < maxReg) { + // LDR r3, [R0], #+4 ; Post incremented load (4 bytes) + ThumbEmitLoadIndirectPostIncrement(ThumbReg(3), ThumbReg(0), 4); + (*pnSrcAlign)++; + + // STR r3, [R1, #offset of arg reg] ; (2 bytes) + ThumbEmitStoreRegIndirect(ThumbReg(3), ThumbReg(1), offsetof(T_CONTEXT, S) + (iReg * sizeof(DWORD))); + } + } + + if (pArgLoc->m_cStack > 0) { + // Copy to the stack + // Be careful because this can get big and ugly. + _ASSERTE(*pcbStackSpace <= (pArgLoc->m_idxStack * sizeof(DWORD))); + + // Pad the output + if (*pcbStackSpace < (pArgLoc->m_idxStack * sizeof(DWORD))) + { + const UINT cbPad = ((pArgLoc->m_idxStack * sizeof(DWORD)) - *pcbStackSpace); + _ASSERTE(cbPad == 4); + // ADD R2, #4 + ThumbEmitIncrement(ThumbReg(2), cbPad); + *pcbStackSpace += cbPad; + } + int cStack = pArgLoc->m_cStack; + *pcbStackSpace += (cStack * sizeof(DWORD)); + + // Now start the copying + if (cStack > 8) { + // Loop to copy in 16-byte chunks per loop. + // Sacrifice r3 for the loop counter + ThumbEmitMovConstant(ThumbReg(3), pArgLoc->m_cStack & ~3); + // LoopLabel: + CodeLabel *pLoopLabel = NewCodeLabel(); + EmitLabel(pLoopLabel); + const WORD mask = ThumbReg(4).Mask() | ThumbReg(5).Mask() | ThumbReg(6).Mask() | ThumbReg(7).Mask(); + // LDM r0!, {r4,r5,r6,r7} ; Post incremented loads (2 bytes) + ThumbEmitLoadStoreMultiple(ThumbReg(0), true, mask); + // STM r2!, {r4,r5,r6,r7} ; Post incremented stores (2 bytes) + ThumbEmitLoadStoreMultiple(ThumbReg(2), false, mask); + // SUBS r3, #4 + Emit16((WORD)(0x3800 | (ThumbReg(3) << 8) | 4)); + // BNZ LoopLabel + ThumbEmitCondFlagJump(pLoopLabel, thumbCondNe.cond); + + cStack = cStack % 4; + // Now deal with the tail if any + } + _ASSERTE(cStack <= 8); + + while (cStack > 1) { + _ASSERTE(cStack >= 2); + WORD mask = ThumbReg(4).Mask() | ThumbReg(5).Mask(); + cStack -= 2; + if (cStack > 0) { + mask |= ThumbReg(6).Mask(); + cStack--; + // Instead of copying 4 slots and leaving a single slot remainder + // which would require us to use the bigger opcodes for the tail + // Only copy 3 slots this loop, saving 2 for next time. :) + if (cStack == 1 || cStack > 2) { + mask |= ThumbReg(7).Mask(); + cStack--; + } + else { + // We're reading an odd amount from the stack + (*pnSrcAlign)++; + } + } + + // LDM r0!, {r4,r5,r6,r7} ; Post incremented loads (2 bytes) + ThumbEmitLoadStoreMultiple(ThumbReg(0), true, mask); + // STM r2!, {r4,r5,r6,r7} ; Post incremented stores (2 bytes) + ThumbEmitLoadStoreMultiple(ThumbReg(2), false, mask); + _ASSERTE((cStack == 0) || (cStack >= 2)); + } + if (cStack > 0) { + _ASSERTE(cStack == 1); + // We're reading an odd amount from the stack + (*pnSrcAlign)++; + // LDR r12, [R0], #+4 ; Post incremented load (4 bytes) + ThumbEmitLoadIndirectPostIncrement(ThumbReg(12), ThumbReg(0), 4); + // STR r12, [R2], #+4 ; Post incremented store (4 bytes) + ThumbEmitStoreIndirectPostIncrement(ThumbReg(12), ThumbReg(2), 4); + } + } +} + + +Stub * StubLinkerCPU::CreateTailCallCopyArgsThunk(CORINFO_SIG_INFO * pSig, + CorInfoHelperTailCallSpecialHandling flags) +{ + STANDARD_VM_CONTRACT; + + CPUSTUBLINKER sl; + CPUSTUBLINKER* pSl = &sl; + + // Generates a function that looks like this: + // size_t CopyArguments(va_list args, (R0) + // CONTEXT *pCtx, (R1) + // DWORD *pvStack, (R2) + // size_t cbStack) (R3) + // { + // if (pCtx != NULL) { + // foreach (arg in args) { + // copy into pCtx or pvStack + // } + // } + // return <size of stack needed>; + // } + // + + Module * module = GetModule(pSig->scope); + Instantiation classInst((TypeHandle*)pSig->sigInst.classInst, pSig->sigInst.classInstCount); + Instantiation methodInst((TypeHandle*)pSig->sigInst.methInst, pSig->sigInst.methInstCount); + SigTypeContext typeCtxt(classInst, methodInst); + + // The -8 is because R11 points at the pushed {R11, LR} pair, and it is aligned. + // This is the magic distance, between the frame pointer and the Frame. + const UINT cbFrameOffset = (sizeof(FrameWithCookie<TailCallFrame>) - 8); + + bool fNeedExtraRegs = false; + UINT copyEstimate = 0; + { + // Do a quick scan of the arguments looking for ones that will probably need extra registers + // and guestimating the size of the method + if (flags & CORINFO_TAILCALL_STUB_DISPATCH_ARG) + copyEstimate += 6; + + if (pSig->hasThis()) + copyEstimate += 6; + + MetaSig msig(pSig->pSig, pSig->cbSig, module, &typeCtxt); + if (pSig->hasTypeArg()) + msig.SetHasParamTypeArg(); + ArgIterator argPlacer(&msig); + + if (argPlacer.HasRetBuffArg()) { + copyEstimate += 24; + } + + if (pSig->hasTypeArg() || pSig->isVarArg()) + copyEstimate += 6; + + int argOffset; + while ((argOffset = argPlacer.GetNextOffset()) != TransitionBlock::InvalidOffset) + { + ArgLocDesc argLoc; + argPlacer.GetArgLoc(argOffset, &argLoc); + + if (argLoc.m_cStack > 1 || argLoc.m_cGenReg > 1 || argLoc.m_cFloatReg > 1) { + fNeedExtraRegs = true; + } + else { + copyEstimate += 8; + } + } + } + + if (fNeedExtraRegs) { + // Inject a proper prolog + // push {r4-r7,lr} + pSl->ThumbEmitProlog(4, 0, false); + } + + CodeLabel *pNullLabel = pSl->NewCodeLabel(); + + if (!fNeedExtraRegs && copyEstimate < 100) { + // The real range of BCZ is 0-126, but that's hard to estimate that precisely + // and we don't want to do that much work just to save a few bytes + + // BCZ R1, NullLabel + pSl->ThumbEmitCondRegJump(pNullLabel, false, ThumbReg(1)); + } + else { + // CMP R1, 0 ; T1 encoding + pSl->Emit16((WORD)(0x2900)); + + // BEQ NullLabel + pSl->ThumbEmitCondFlagJump(pNullLabel, thumbCondEq.cond); + } + + UINT cbStackSpace = 0; + UINT cbReturnBufferSpace = 0; + UINT nSrcAlign = 0; + + if (flags & CORINFO_TAILCALL_STUB_DISPATCH_ARG) { + // This is set for stub dispatch or 'thisInSecretRegister' + // The JIT placed an extra argument in the list that needs to + // get shoved into R4, and not counted. + // pCtx->R4 = va_arg(args, DWORD); + + // LDR r3, [R0], #+4 ; Post incremented load (4 bytes) + pSl->ThumbEmitLoadIndirectPostIncrement(ThumbReg(3), ThumbReg(0), 4); + // STR r3, [R1, #offset of R4] ; (2 bytes) + pSl->ThumbEmitStoreRegIndirect(ThumbReg(3), ThumbReg(1), offsetof(T_CONTEXT, R4)); + nSrcAlign++; + } + + + MetaSig msig(pSig->pSig, pSig->cbSig, module, &typeCtxt); + if (pSig->hasTypeArg()) + msig.SetHasParamTypeArg(); + ArgIterator argPlacer(&msig); + ArgLocDesc argLoc; + + // First comes the 'this' pointer + if (argPlacer.HasThis()) { + argPlacer.GetThisLoc(&argLoc); + pSl->ThumbCopyOneTailCallArg(&nSrcAlign, &argLoc, &cbStackSpace); + } + + // Next comes the return buffer + if (argPlacer.HasRetBuffArg()) { + // We always reserve space for the return buffer, but we never zero it out, + // and we never report it. Thus the callee shouldn't do RVO and expect + // to be able to read GC pointers from it. + // If the passed in return buffer is already pointing above the frame, + // then we need to pass it along (so it will get passed out). + // Otherwise we assume the caller is returning void, so we just pass in + // dummy space to be overwritten. + + argPlacer.GetRetBuffArgLoc(&argLoc); + _ASSERTE(argLoc.m_cStack == 0); + _ASSERTE(argLoc.m_cFloatReg == 0); + _ASSERTE(argLoc.m_cGenReg == 1); + + // Grab some space from the top of the frame and pass that in as a dummy + // buffer if needed. Align to 8-byte boundary (after taking in account the Frame). + // Do this by adding the Frame size, align, then remove the Frame size... + _ASSERTE((pSig->retType == CORINFO_TYPE_REFANY) || (pSig->retType == CORINFO_TYPE_VALUECLASS)); + TypeHandle th(pSig->retTypeClass); + UINT cbUsed = ((th.GetSize() + cbFrameOffset + 0x7) & ~0x7) - cbFrameOffset; + _ASSERTE(cbUsed >= th.GetSize()); + cbReturnBufferSpace += cbUsed; + + // LDR r3, [R0], #+4 ; Post incremented load (4 bytes) + pSl->ThumbEmitLoadIndirectPostIncrement(ThumbReg(3), ThumbReg(0), 4); + + // LDR r12, [R1, #offset of R11] ; (2 bytes) + pSl->ThumbEmitLoadRegIndirect(ThumbReg(12), ThumbReg(1), offsetof(T_CONTEXT, R11)); + + // CMP r3, r12 ; (2 bytes) + pSl->ThumbEmitCmpReg(ThumbReg(3), ThumbReg(12)); + + CodeLabel *pSkipLabel = pSl->NewCodeLabel(); + // BHI NullLabel ; skip if R3 > R12 unsigned (2 bytes) + pSl->ThumbEmitCondFlagJump(pSkipLabel, thumbCondHi.cond); + + // Also check the lower bound of the stack in case the return buffer is on the GC heap + // and the GC heap is below the stack + // CMP r3, sp ; (2 bytes) + pSl->ThumbEmitCmpReg(ThumbReg(3), thumbRegSp); + // BLO NullLabel ; skip if r3 < sp unsigned (2 bytes) + pSl->ThumbEmitCondFlagJump(pSkipLabel, thumbCondCc.cond); + + // If the caller is expecting us to simulate a return buffer for the callee + // pass that pointer in now, by subtracting from R11 space for the Frame + // and space for the return buffer. + UINT offset = cbUsed + cbFrameOffset; + if (offset < 4096) { + // SUB r3, r12, #offset ; (4 bytes) + pSl->ThumbEmitSub(ThumbReg(3), ThumbReg(12), offset); + } + else { + offset = UINT(-int(offset)); // Silence the @#$%^ warning + // MOVW/MOVT (4-8 bytes) + // ADD r3, r12; (2 bytes) + pSl->ThumbEmitAdd(ThumbReg(3), ThumbReg(12), offset); + } + // SkipLabel: + pSl->EmitLabel(pSkipLabel); + // STR r3, [R1, #offset of arg reg] ; (2 bytes) + pSl->ThumbEmitStoreRegIndirect(ThumbReg(3), ThumbReg(1), offsetof(T_CONTEXT, R0) + (argLoc.m_idxGenReg * sizeof(DWORD))); + + nSrcAlign++; + } + + // Generics Instantiation Parameter + if (pSig->hasTypeArg()) { + argPlacer.GetParamTypeLoc(&argLoc); + pSl->ThumbCopyOneTailCallArg(&nSrcAlign, &argLoc, &cbStackSpace); + } + + // VarArgs Cookie Parameter + if (pSig->isVarArg()) { + argPlacer.GetVASigCookieLoc(&argLoc); + pSl->ThumbCopyOneTailCallArg(&nSrcAlign, &argLoc, &cbStackSpace); + } + + // Now for *all* the 'real' arguments + int argOffset; + while ((argOffset = argPlacer.GetNextOffset()) != TransitionBlock::InvalidOffset) + { + argPlacer.GetArgLoc(argOffset, &argLoc); + + pSl->ThumbCopyOneTailCallArg(&nSrcAlign, &argLoc, &cbStackSpace); + } + + // Now that we are done moving arguments, add back in the stack space we reserved + // for the return buffer. + cbStackSpace += cbReturnBufferSpace; + + // Keep the stack space 8-byte aligned + if ((cbStackSpace + cbFrameOffset) & 7) { + cbStackSpace += 4; + } + _ASSERTE(((cbStackSpace + cbFrameOffset) & 7) == 0); + + CodeLabel *pReturnLabel = pSl->NewCodeLabel(); + // B ReturnLabel: + pSl->ThumbEmitNearJump(pReturnLabel); + + // NullLabel: + pSl->EmitLabel(pNullLabel); + // MOVW/MOVT r0, 0 ; No GCLayout info + pSl->ThumbEmitMovConstant(ThumbReg(0), 0); + // STR r0, [r3] + pSl->ThumbEmitStoreRegIndirect(ThumbReg(0), ThumbReg(3), 0); + + // ReturnLabel: + pSl->EmitLabel(pReturnLabel); + + // MOVW/MOVT r0, #cbStackSpace + pSl->ThumbEmitMovConstant(ThumbReg(0), cbStackSpace); + + if (fNeedExtraRegs) { + // Inject a proper prolog + // pop {r4-r7,pc} + pSl->ThumbEmitEpilog(); + } + else { + // bx lr + pSl->ThumbEmitJumpRegister(thumbRegLr); + } + + + return pSl->Link(); +} + + +VOID ResetCurrentContext() +{ + LIMITED_METHOD_CONTRACT; +} +#endif // !DACCESS_COMPILE + +#if defined(FEATURE_REMOTING) && !defined(CROSSGEN_COMPILE) + +#ifndef DACCESS_COMPILE +PCODE CTPMethodTable::CreateThunkForVirtualMethod(DWORD dwSlot, BYTE *startaddr) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + PRECONDITION(CheckPointer(startaddr)); + } + CONTRACTL_END; + + WORD *pCode = (WORD*)((ULONG_PTR)startaddr); + + // Slot literal is split into four pieces in the mov instruction: + // imm4:i:imm3:imm8 + _ASSERTE(FitsInU2(dwSlot)); + WORD imm4 = ((WORD)dwSlot & 0xf000) >> 12; + WORD i = ((WORD)dwSlot & 0x0800) >> 11; + WORD imm3 = ((WORD)dwSlot & 0x0700) >> 8; + WORD imm8 = (WORD)dwSlot & 0x00ff; + + // f240 0c00 mov r12, #dwSlot + // f8df f000 ldr pc, [pc, #0] + // ???? ???? dcd TransparentProxyStub + + *pCode++ = 0xf240 | (i << 10) | imm4; + *pCode++ = 0x0c00 | (imm3 << 12) | imm8; + *pCode++ = 0xf8df; + *pCode++ = 0xf000; + *((PCODE*)pCode) = GetTPStubEntryPoint(); + + _ASSERTE(CVirtualThunkMgr::IsThunkByASM((PCODE)startaddr)); + + return (PCODE)(startaddr + THUMB_CODE); +} +#endif // DACCESS_COMPILE + +BOOL CVirtualThunkMgr::IsThunkByASM(PCODE startaddr) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + PRECONDITION(startaddr != NULL); + } + CONTRACTL_END; + +#ifndef DACCESS_COMPILE + PTR_WORD pInstr = dac_cast<PTR_WORD>(PCODEToPINSTR(startaddr)); + + return (((pInstr[0] & 0xf240) == 0xf240) && + ((pInstr[1] & 0x0c00) == 0x0c00) && + (pInstr[2] == 0xf8df) && + (pInstr[3] == 0xf000) && + (*(PCODE*)&pInstr[4] == CTPMethodTable::GetTPStubEntryPoint())); +#else + DacNotImpl(); + return FALSE; +#endif +} + +MethodDesc *CVirtualThunkMgr::GetMethodDescByASM(PCODE startaddr, MethodTable *pMT) +{ + CONTRACT (MethodDesc*) + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + PRECONDITION(startaddr != NULL); + PRECONDITION(CheckPointer(pMT)); + POSTCONDITION(CheckPointer(RETVAL)); + } + CONTRACT_END; + + _ASSERTE(IsThunkByASM(startaddr)); + + PTR_WORD pInstr = dac_cast<PTR_WORD>(PCODEToPINSTR(startaddr)); + + WORD i = (pInstr[0] & 0x0400) >> 10; + WORD imm4 = pInstr[0] & 0x000f; + WORD imm3 = (pInstr[1] & 0x7000) >> 12; + WORD imm8 = pInstr[1] & 0x00ff; + + WORD wSlot = (imm4 << 12) | (i << 11) | (imm3 << 8) | imm8; + + RETURN (pMT->GetMethodDescForSlot(wSlot)); +} + +#ifndef DACCESS_COMPILE + +BOOL CVirtualThunkMgr::DoTraceStub(PCODE stubStartAddress, TraceDestination *trace) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_ANY; + PRECONDITION(stubStartAddress != NULL); + PRECONDITION(CheckPointer(trace)); + } + CONTRACTL_END; + + TADDR pInstr = PCODEToPINSTR(stubStartAddress); + + BOOL bIsStub = FALSE; + + // Find a thunk whose code address matching the starting address + LPBYTE pThunk = FindThunk((LPBYTE)pInstr); + if (pThunk) + { + LONG destAddress = 0; + + // The stub target address is stored as an absolute pointer 8 byte into the thunk. + destAddress = *(LONG*)(pThunk + 8); + + // We cannot tell where the stub will end up until OnCall is reached. + // So we tell the debugger to run till OnCall is reached and then + // come back and ask us again for the actual destination address of + // the call + + Stub *stub = Stub::RecoverStub((TADDR)destAddress); + + trace->InitForFramePush(stub->GetPatchAddress()); + bIsStub = TRUE; + } + + return bIsStub; +} + +extern "C" UINT_PTR __stdcall CRemotingServices__CheckForContextMatch(Object* pStubData) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_COOPERATIVE; // due to the Object parameter + SO_TOLERANT; + PRECONDITION(CheckPointer(pStubData)); + } + CONTRACTL_END; + + UINT_PTR contextID = *(UINT_PTR*)pStubData->UnBox(); + UINT_PTR contextCur = (UINT_PTR)GetThread()->m_Context; + return (contextCur != contextID); // chosen to match x86 convention +} + +// Return true if the current context matches that of the transparent proxy given. +BOOL CTPMethodTable__GenericCheckForContextMatch(Object* orTP) +{ + CONTRACTL + { + NOTHROW; + GC_NOTRIGGER; + MODE_COOPERATIVE; // due to the Object parameter + SO_TOLERANT; + } + CONTRACTL_END; + + Object *StubData = OBJECTREFToObject(((TransparentProxyObject*)orTP)->GetStubData()); + CTPMethodTable::CheckContextCrossingProc *pfnCheckContextCrossing = + (CTPMethodTable::CheckContextCrossingProc*)(((TransparentProxyObject*)orTP)->GetStub()); + return pfnCheckContextCrossing(StubData) == 0; +} + +#endif // !DACCESS_COMPILE + +#endif // FEATURE_REMOTING && !CROSSGEN_COMPILE + +#ifdef FEATURE_COMINTEROP +void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target) +{ + WRAPPER_NO_CONTRACT; + + // mov r12, pc + // ldr pc, [pc, #0] + // dcd 0 + // dcd target + WORD rgCode[] = { + 0x46fc, + 0xf8df, 0xf004 + }; + + BYTE *pBuffer = (BYTE*)pCOMMethod - COMMETHOD_CALL_PRESTUB_SIZE; + + memcpy(pBuffer, rgCode, sizeof(rgCode)); + *((PCODE*)(pBuffer + sizeof(rgCode) + 2)) = target; + + // Ensure that the updated instructions get actually written + ClrFlushInstructionCache(pBuffer, COMMETHOD_CALL_PRESTUB_SIZE); + + _ASSERTE(IS_ALIGNED(pBuffer + COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET, sizeof(void*)) && + *((PCODE*)(pBuffer + COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET)) == target); +} +#endif // FEATURE_COMINTEROP + +#ifndef DACCESS_COMPILE + +#ifndef CROSSGEN_COMPILE + +DWORD GetLogicalCpuCount() +{ + // Just use the OS to return this information (the APIs used exist on all versions of Windows which + // support ARM). + return GetLogicalCpuCountFromOS(); +} + +#ifdef FEATURE_READYTORUN + +// +// Allocation of dynamic helpers +// + +#define DYNAMIC_HELPER_ALIGNMENT sizeof(TADDR) + +#define BEGIN_DYNAMIC_HELPER_EMIT(size) \ + SIZE_T cb = size; \ + SIZE_T cbAligned = ALIGN_UP(cb, DYNAMIC_HELPER_ALIGNMENT); \ + BYTE * pStart = (BYTE *)(void *)pAllocator->GetDynamicHelpersHeap()->AllocAlignedMem(cbAligned, DYNAMIC_HELPER_ALIGNMENT); \ + BYTE * p = pStart; + +#define END_DYNAMIC_HELPER_EMIT() \ + _ASSERTE(pStart + cb == p); \ + while (p < pStart + cbAligned) { *(WORD *)p = 0xdefe; p += 2; } \ + ClrFlushInstructionCache(pStart, cbAligned); \ + return (PCODE)((TADDR)pStart | THUMB_CODE) + +static void MovRegImm(BYTE* p, int reg, TADDR imm) +{ + LIMITED_METHOD_CONTRACT; + *(WORD *)(p + 0) = 0xF240; + *(WORD *)(p + 2) = (UINT16)(reg << 8); + *(WORD *)(p + 4) = 0xF2C0; + *(WORD *)(p + 6) = (UINT16)(reg << 8); + PutThumb2Mov32((UINT16 *)p, imm); +} + +PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + STANDARD_VM_CONTRACT; + + BEGIN_DYNAMIC_HELPER_EMIT(18); + + // mov r0, arg + MovRegImm(p, 0, arg); + p += 8; + + // mov r12, target + MovRegImm(p, 12, target); + p += 8; + + // bx r12 + *(WORD *)p = 0x4760; + p += 2; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateHelperWithArg(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + BEGIN_DYNAMIC_HELPER_EMIT(18); + + // mov r1, arg + MovRegImm(p, 1, arg); + p += 8; + + // mov r12, target + MovRegImm(p, 12, target); + p += 8; + + // bx r12 + *(WORD *)p = 0x4760; + p += 2; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateHelper(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target) +{ + BEGIN_DYNAMIC_HELPER_EMIT(26); + + // mov r0, arg + MovRegImm(p, 0, arg); + p += 8; + + // mov r1, arg2 + MovRegImm(p, 1, arg2); + p += 8; + + // mov r12, target + MovRegImm(p, 12, target); + p += 8; + + // bx r12 + *(WORD *)p = 0x4760; + p += 2; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateHelperArgMove(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + BEGIN_DYNAMIC_HELPER_EMIT(20); + + // mov r1, r0 + *(WORD *)p = 0x4601; + p += 2; + + // mov r0, arg + MovRegImm(p, 0, arg); + p += 8; + + // mov r12, target + MovRegImm(p, 12, target); + p += 8; + + // bx r12 + *(WORD *)p = 0x4760; + p += 2; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateReturn(LoaderAllocator * pAllocator) +{ + BEGIN_DYNAMIC_HELPER_EMIT(2); + + *(WORD *)p = 0x4770; // bx lr + p += 2; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateReturnConst(LoaderAllocator * pAllocator, TADDR arg) +{ + BEGIN_DYNAMIC_HELPER_EMIT(10); + + // mov r0, arg + MovRegImm(p, 0, arg); + p += 8; + + // bx lr + *(WORD *)p = 0x4770; + p += 2; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateReturnIndirConst(LoaderAllocator * pAllocator, TADDR arg, INT8 offset) +{ + BEGIN_DYNAMIC_HELPER_EMIT((offset != 0) ? 16 : 12); + + // mov r0, arg + MovRegImm(p, 0, arg); + p += 8; + + // ldr r0, [r0] + *(WORD *)p = 0x6800; + p += 2; + + if (offset != 0) + { + // add r0, r0, <offset> + *(WORD *)(p + 0) = 0xF100; + *(WORD *)(p + 2) = offset; + p += 4; + } + + // bx lr + *(WORD *)p = 0x4770; + p += 2; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, PCODE target) +{ + BEGIN_DYNAMIC_HELPER_EMIT(18); + + // mov r2, arg + MovRegImm(p, 2, arg); + p += 8; + + // mov r12, target + MovRegImm(p, 12, target); + p += 8; + + // bx r12 + *(WORD *)p = 0x4760; + p += 2; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateHelperWithTwoArgs(LoaderAllocator * pAllocator, TADDR arg, TADDR arg2, PCODE target) +{ + BEGIN_DYNAMIC_HELPER_EMIT(26); + + // mov r2, arg + MovRegImm(p, 2, arg); + p += 8; + + // mov r3, arg + MovRegImm(p, 3, arg2); + p += 8; + + // mov r12, target + MovRegImm(p, 12, target); + p += 8; + + // bx r12 + *(WORD *)p = 0x4760; + p += 2; + + END_DYNAMIC_HELPER_EMIT(); +} + +PCODE DynamicHelpers::CreateDictionaryLookupHelper(LoaderAllocator * pAllocator, CORINFO_RUNTIME_LOOKUP * pLookup, DWORD dictionaryIndexAndSlot, Module * pModule) +{ + STANDARD_VM_CONTRACT; + + // TODO (NYI) + ThrowHR(E_NOTIMPL); +} +#endif // FEATURE_READYTORUN + +#endif // CROSSGEN_COMPILE + +#endif // !DACCESS_COMPILE |