summaryrefslogtreecommitdiff
path: root/src/vm/arm/cgencpu.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/vm/arm/cgencpu.h')
-rw-r--r--src/vm/arm/cgencpu.h1338
1 files changed, 1338 insertions, 0 deletions
diff --git a/src/vm/arm/cgencpu.h b/src/vm/arm/cgencpu.h
new file mode 100644
index 0000000000..936fdabafb
--- /dev/null
+++ b/src/vm/arm/cgencpu.h
@@ -0,0 +1,1338 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+
+#ifndef _TARGET_ARM_
+#error Should only include "cGenCpu.h" for ARM builds
+#endif
+
+#ifndef __cgencpu_h__
+#define __cgencpu_h__
+
+#include "utilcode.h"
+#include "tls.h"
+
+// preferred alignment for data
+#define DATA_ALIGNMENT 4
+
+#define DISPATCH_STUB_FIRST_WORD 0xf8d0
+#define RESOLVE_STUB_FIRST_WORD 0xf8d0
+
+class MethodDesc;
+class FramedMethodFrame;
+class Module;
+struct DeclActionInfo;
+class ComCallMethodDesc;
+class BaseDomain;
+class ZapNode;
+struct ArgLocDesc;
+
+#define USE_REDIRECT_FOR_GCSTRESS
+
+// CPU-dependent functions
+Stub * GenerateInitPInvokeFrameHelper();
+
+EXTERN_C void checkStack(void);
+
+#ifdef CROSSGEN_COMPILE
+#define GetEEFuncEntryPoint(pfn) 0x1001
+#else
+#define GetEEFuncEntryPoint(pfn) GFN_TADDR(pfn)
+#endif
+
+//**********************************************************************
+
+#define COMMETHOD_PREPAD 12 // # extra bytes to allocate in addition to sizeof(ComCallMethodDesc)
+#ifdef FEATURE_COMINTEROP
+#define COMMETHOD_CALL_PRESTUB_SIZE 12
+#define COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET 8 // the offset of the call target address inside the prestub
+#endif // FEATURE_COMINTEROP
+
+#define STACK_ALIGN_SIZE 4
+
+#define JUMP_ALLOCATE_SIZE 8 // # bytes to allocate for a jump instruction
+#define BACK_TO_BACK_JUMP_ALLOCATE_SIZE 8 // # bytes to allocate for a back to back jump instruction
+
+//#define HAS_COMPACT_ENTRYPOINTS 1
+
+#define HAS_NDIRECT_IMPORT_PRECODE 1
+
+#define USE_INDIRECT_CODEHEADER
+
+#ifdef FEATURE_REMOTING
+#define HAS_REMOTING_PRECODE 1
+#endif
+
+EXTERN_C void getFPReturn(int fpSize, INT64 *pRetVal);
+EXTERN_C void setFPReturn(int fpSize, INT64 retVal);
+
+#define HAS_FIXUP_PRECODE 1
+#define HAS_FIXUP_PRECODE_CHUNKS 1
+
+// ThisPtrRetBufPrecode one is necessary for closed delegates over static methods with return buffer
+#define HAS_THISPTR_RETBUF_PRECODE 1
+
+#define CODE_SIZE_ALIGN 4
+#define CACHE_LINE_SIZE 32 // As per Intel Optimization Manual the cache line size is 32 bytes
+#define LOG2SLOT LOG2_PTRSIZE
+
+#define ENREGISTERED_RETURNTYPE_MAXSIZE 32 // bytes (maximum HFA size is 4 doubles)
+#define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE 4 // bytes
+
+#define CALLDESCR_ARGREGS 1 // CallDescrWorker has ArgumentRegister parameter
+#define CALLDESCR_FPARGREGS 1 // CallDescrWorker has FloatArgumentRegisters parameter
+
+// Max size of optimized TLS helpers
+#define TLS_GETTER_MAX_SIZE 0x10
+
+// Given a return address retrieved during stackwalk,
+// this is the offset by which it should be decremented to arrive at the callsite.
+#define STACKWALK_CONTROLPC_ADJUST_OFFSET 2
+
+//=======================================================================
+// IMPORTANT: This value is used to figure out how much to allocate
+// for a fixed array of FieldMarshaler's. That means it must be at least
+// as large as the largest FieldMarshaler subclass. This requirement
+// is guarded by an assert.
+//=======================================================================
+#define MAXFIELDMARSHALERSIZE 24
+
+//**********************************************************************
+// Parameter size
+//**********************************************************************
+
+typedef INT32 StackElemType;
+#define STACK_ELEM_SIZE sizeof(StackElemType)
+
+// !! This expression assumes STACK_ELEM_SIZE is a power of 2.
+#define StackElemSize(parmSize) (((parmSize) + STACK_ELEM_SIZE - 1) & ~((ULONG)(STACK_ELEM_SIZE - 1)))
+
+//**********************************************************************
+// Frames
+//**********************************************************************
+
+//--------------------------------------------------------------------
+// This represents the callee saved (non-volatile) registers saved as
+// of a FramedMethodFrame.
+//--------------------------------------------------------------------
+typedef DPTR(struct CalleeSavedRegisters) PTR_CalleeSavedRegisters;
+struct CalleeSavedRegisters {
+ INT32 r4, r5, r6, r7, r8, r9, r10;
+ INT32 r11; // frame pointer
+ INT32 r14; // link register
+};
+
+//--------------------------------------------------------------------
+// This represents the arguments that are stored in volatile registers.
+// This should not overlap the CalleeSavedRegisters since those are already
+// saved separately and it would be wasteful to save the same register twice.
+// If we do use a non-volatile register as an argument, then the ArgIterator
+// will probably have to communicate this back to the PromoteCallerStack
+// routine to avoid a double promotion.
+//--------------------------------------------------------------------
+typedef DPTR(struct ArgumentRegisters) PTR_ArgumentRegisters;
+struct ArgumentRegisters {
+ INT32 r[4]; // r0, r1, r2, r3
+};
+#define NUM_ARGUMENT_REGISTERS 4
+
+//--------------------------------------------------------------------
+// This represents the floating point argument registers which are saved
+// as part of the NegInfo for a FramedMethodFrame. Note that these
+// might not be saved by all stubs: typically only those that call into
+// C++ helpers will need to preserve the values in these volatile
+// registers.
+//--------------------------------------------------------------------
+typedef DPTR(struct FloatArgumentRegisters) PTR_FloatArgumentRegisters;
+struct FloatArgumentRegisters {
+ union
+ {
+ float s[16]; // s0-s15
+ double d[8]; // d0-d7
+ };
+};
+
+// forward decl
+struct REGDISPLAY;
+typedef REGDISPLAY *PREGDISPLAY;
+
+// Sufficient context for Try/Catch restoration.
+struct EHContext {
+ INT32 r[16]; // note: includes r15(pc)
+ void Setup(PCODE resumePC, PREGDISPLAY regs);
+
+ inline TADDR GetSP() {
+ LIMITED_METHOD_CONTRACT;
+ return (TADDR)r[13];
+ }
+ inline void SetSP(LPVOID esp) {
+ LIMITED_METHOD_CONTRACT;
+ r[13] = (INT32)(size_t)esp;
+ }
+
+ inline LPVOID GetFP() {
+ LIMITED_METHOD_CONTRACT;
+ return (LPVOID)(UINT_PTR)r[11];
+ }
+
+ inline void SetArg(LPVOID arg) {
+ LIMITED_METHOD_CONTRACT;
+ r[0] = (INT32)(size_t)arg;
+ }
+};
+
+#define ARGUMENTREGISTERS_SIZE sizeof(ArgumentRegisters)
+
+//**********************************************************************
+// Exception handling
+//**********************************************************************
+
+inline PCODE GetIP(const T_CONTEXT * context) {
+ LIMITED_METHOD_DAC_CONTRACT;
+ return PCODE(context->Pc);
+}
+
+inline void SetIP(T_CONTEXT *context, PCODE eip) {
+ LIMITED_METHOD_DAC_CONTRACT;
+ context->Pc = DWORD(eip);
+}
+
+inline TADDR GetSP(const T_CONTEXT * context) {
+ LIMITED_METHOD_DAC_CONTRACT;
+ return TADDR(context->Sp);
+}
+
+inline PCODE GetLR(const T_CONTEXT * context) {
+ LIMITED_METHOD_DAC_CONTRACT;
+ return PCODE(context->Lr);
+}
+
+extern "C" LPVOID __stdcall GetCurrentSP();
+
+inline void SetSP(T_CONTEXT *context, TADDR esp) {
+ LIMITED_METHOD_DAC_CONTRACT;
+ context->Sp = DWORD(esp);
+}
+
+inline void SetFP(T_CONTEXT *context, TADDR ebp) {
+ LIMITED_METHOD_DAC_CONTRACT;
+ context->R11 = DWORD(ebp);
+}
+
+inline TADDR GetFP(const T_CONTEXT * context)
+{
+ LIMITED_METHOD_DAC_CONTRACT;
+ return (TADDR)(context->R11);
+}
+
+inline void ClearITState(T_CONTEXT *context) {
+ LIMITED_METHOD_DAC_CONTRACT;
+ context->Cpsr = context->Cpsr & 0xf9ff03ff;
+}
+
+#ifdef FEATURE_COMINTEROP
+void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target);
+#endif // FEATURE_COMINTEROP
+
+//------------------------------------------------------------------------
+inline void emitJump(LPBYTE pBuffer, LPVOID target)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ // The PC-relative load we emit below requires 4-byte alignment for the offset to be calculated correctly.
+ _ASSERTE(((UINT_PTR)pBuffer & 3) == 0);
+
+ DWORD * pCode = (DWORD *)pBuffer;
+
+ // ldr pc, [pc, #0]
+ pCode[0] = 0xf000f8df;
+ pCode[1] = (DWORD)target;
+}
+
+//------------------------------------------------------------------------
+// Given the same pBuffer that was used by emitJump this method
+// decodes the instructions and returns the jump target
+inline PCODE decodeJump(PCODE pCode)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ TADDR pInstr = PCODEToPINSTR(pCode);
+
+ return *dac_cast<PTR_PCODE>(pInstr + sizeof(DWORD));
+}
+
+//
+// On IA64 back to back jumps should be separated by a nop bundle to get
+// the best performance from the hardware's branch prediction logic.
+// For all other platforms back to back jumps don't require anything special
+// That is why we have these two wrapper functions that call emitJump and decodeJump
+//
+
+//------------------------------------------------------------------------
+inline BOOL isJump(PCODE pCode)
+{
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ TADDR pInstr = PCODEToPINSTR(pCode);
+
+ return *dac_cast<PTR_DWORD>(pInstr) == 0xf000f8df;
+}
+
+//------------------------------------------------------------------------
+inline BOOL isBackToBackJump(PCODE pBuffer)
+{
+ WRAPPER_NO_CONTRACT;
+ SUPPORTS_DAC;
+ return isJump(pBuffer);
+}
+
+//------------------------------------------------------------------------
+inline void emitBackToBackJump(LPBYTE pBuffer, LPVOID target)
+{
+ WRAPPER_NO_CONTRACT;
+ emitJump(pBuffer, target);
+}
+
+//------------------------------------------------------------------------
+inline PCODE decodeBackToBackJump(PCODE pBuffer)
+{
+ WRAPPER_NO_CONTRACT;
+ return decodeJump(pBuffer);
+}
+
+//----------------------------------------------------------------------
+#include "stublink.h"
+struct ArrayOpScript;
+
+#define THUMB_CODE 1
+
+inline BOOL IsThumbCode(PCODE pCode)
+{
+ return (pCode & THUMB_CODE) != 0;
+}
+
+struct ThumbReg
+{
+ int reg;
+ ThumbReg(int reg):reg(reg)
+ {
+ _ASSERTE(0 <= reg && reg < 16);
+ }
+
+ operator int ()
+ {
+ return reg;
+ }
+
+ int operator == (ThumbReg other)
+ {
+ return reg == other.reg;
+ }
+
+ int operator != (ThumbReg other)
+ {
+ return reg != other.reg;
+ }
+
+ WORD Mask() const
+ {
+ return 1 << reg;
+ }
+
+};
+
+struct ThumbCond
+{
+ int cond;
+ ThumbCond(int cond):cond(cond)
+ {
+ _ASSERTE(0 <= cond && cond < 16);
+ }
+};
+
+struct ThumbVFPSingleReg
+{
+ int reg;
+ ThumbVFPSingleReg(int reg):reg(reg)
+ {
+ _ASSERTE(0 <= reg && reg < 31);
+ }
+
+ operator int ()
+ {
+ return reg;
+ }
+
+ int operator == (ThumbVFPSingleReg other)
+ {
+ return reg == other.reg;
+ }
+
+ int operator != (ThumbVFPSingleReg other)
+ {
+ return reg != other.reg;
+ }
+
+ WORD Mask() const
+ {
+ return 1 << reg;
+ }
+
+};
+
+struct ThumbVFPDoubleReg
+{
+ int reg;
+ ThumbVFPDoubleReg(int reg):reg(reg)
+ {
+ _ASSERTE(0 <= reg && reg < 31);
+ }
+
+ operator int ()
+ {
+ return reg;
+ }
+
+ int operator == (ThumbVFPDoubleReg other)
+ {
+ return reg == other.reg;
+ }
+
+ int operator != (ThumbVFPDoubleReg other)
+ {
+ return reg != other.reg;
+ }
+
+ WORD Mask() const
+ {
+ return 1 << reg;
+ }
+};
+
+const ThumbReg thumbRegFp = ThumbReg(11);
+const ThumbReg thumbRegSp = ThumbReg(13);
+const ThumbReg thumbRegLr = ThumbReg(14);
+const ThumbReg thumbRegPc = ThumbReg(15);
+
+const ThumbCond thumbCondEq = ThumbCond(0);
+const ThumbCond thumbCondNe = ThumbCond(1);
+const ThumbCond thumbCondCs = ThumbCond(2);
+const ThumbCond thumbCondCc = ThumbCond(3);
+const ThumbCond thumbCondMi = ThumbCond(4);
+const ThumbCond thumbCondPl = ThumbCond(5);
+const ThumbCond thumbCondVs = ThumbCond(6);
+const ThumbCond thumbCondVc = ThumbCond(7);
+const ThumbCond thumbCondHi = ThumbCond(8);
+const ThumbCond thumbCondLs = ThumbCond(9);
+const ThumbCond thumbCondGe = ThumbCond(10);
+const ThumbCond thumbCondLt = ThumbCond(11);
+const ThumbCond thumbCondGt = ThumbCond(12);
+const ThumbCond thumbCondLe = ThumbCond(13);
+const ThumbCond thumbCondAl = ThumbCond(14);
+
+class StubLinkerCPU : public StubLinker
+{
+public:
+ static void Init();
+
+ void ThumbEmitProlog(UINT cCalleeSavedRegs, UINT cbStackFrame, BOOL fPushArgRegs)
+ {
+ _ASSERTE(!m_fProlog);
+
+ // Record the parameters of this prolog so that we can generate a matching epilog and unwind info.
+ DescribeProlog(cCalleeSavedRegs, cbStackFrame, fPushArgRegs);
+
+ // Trivial prologs (which is all that we support initially) consist of between one and three
+ // instructions.
+
+ // 1) Push argument registers. This is all or nothing (if we push, we push R0-R3).
+ if (fPushArgRegs)
+ {
+ // push {r0-r3}
+ ThumbEmitPush(ThumbReg(0).Mask() | ThumbReg(1).Mask() | ThumbReg(2).Mask() | ThumbReg(3).Mask());
+ }
+
+ // 2) Push callee saved registers. We always start pushing at R4, and only saved consecutive registers
+ // from there (max is R11). Additionally we always assume LR is saved for these types of prolog.
+ // push {r4-rX,lr}
+ WORD wRegisters = thumbRegLr.Mask();
+ for (unsigned int i = 4; i < (4 + cCalleeSavedRegs); i++)
+ wRegisters |= ThumbReg(i).Mask();
+ ThumbEmitPush(wRegisters);
+
+ // 3) Reserve space on the stack for the rest of the frame.
+ if (cbStackFrame)
+ {
+ // sub sp, #cbStackFrame
+ ThumbEmitSubSp(cbStackFrame);
+ }
+ }
+
+ void ThumbEmitEpilog()
+ {
+ // Generate an epilog matching a prolog generated by ThumbEmitProlog.
+ _ASSERTE(m_fProlog);
+
+ // If additional stack space for a frame was allocated remove it now.
+ if (m_cbStackFrame)
+ {
+ // add sp, #m_cbStackFrame
+ ThumbEmitAddSp(m_cbStackFrame);
+ }
+
+ // Pop callee saved registers (we always have at least LR). If no argument registers were saved then
+ // we can restore LR back into PC and we're done. Otherwise LR needs to be restored into LR.
+ // pop {r4-rX,lr|pc}
+ WORD wRegisters = m_fPushArgRegs ? thumbRegLr.Mask() : thumbRegPc.Mask();
+ for (unsigned int i = 4; i < (4 + m_cCalleeSavedRegs); i++)
+ wRegisters |= ThumbReg(i).Mask();
+ ThumbEmitPop(wRegisters);
+
+ if (!m_fPushArgRegs)
+ return;
+
+ // We pushed the argument registers. These aren't restored, but we need to reclaim the stack space.
+ // add sp, #16
+ ThumbEmitAddSp(16);
+
+ // Return. The return address has been restored into LR at this point.
+ // bx lr
+ ThumbEmitJumpRegister(thumbRegLr);
+ }
+
+ void ThumbEmitGetThread(TLSACCESSMODE mode, ThumbReg dest);
+
+ void ThumbEmitNop()
+ {
+ // nop
+ Emit16(0xbf00);
+ }
+
+ void ThumbEmitBreakpoint()
+ {
+ // Permanently undefined instruction #0xfe (see ARMv7-A A6.2.6). The debugger seems to accept this as
+ // a reasonable breakpoint substitute (it's what DebugBreak uses). Bkpt #0, on the other hand, always
+ // seems to flow directly to the kernel debugger (even if we ignore it there it doesn't seem to be
+ // picked up by the user mode debugger).
+ Emit16(0xdefe);
+ }
+
+ void ThumbEmitMovConstant(ThumbReg dest, int constant)
+ {
+ _ASSERT(dest != thumbRegPc);
+
+ //Emit 2 Byte instructions when dest reg < 8 & constant <256
+ if(dest <= 7 && constant < 256 && constant >= 0)
+ {
+ Emit16((WORD)(0x2000 | dest<<8 | (WORD)constant));
+ }
+ else // emit 4 byte instructions
+ {
+ WORD wConstantLow = (WORD)(constant & 0xffff);
+ WORD wConstantHigh = (WORD)(constant >> 16);
+
+ // movw regDest, #wConstantLow
+ Emit16((WORD)(0xf240 | (wConstantLow >> 12) | ((wConstantLow & 0x0800) ? 0x0400 : 0x0000)));
+ Emit16((WORD)((dest << 8) | (((wConstantLow >> 8) & 0x0007) << 12) | (wConstantLow & 0x00ff)));
+
+ if (wConstantHigh)
+ {
+ // movt regDest, #wConstantHighw
+ Emit16((WORD)(0xf2c0 | (wConstantHigh >> 12) | ((wConstantHigh & 0x0800) ? 0x0400 : 0x0000)));
+ Emit16((WORD)((dest << 8) | (((wConstantHigh >> 8) & 0x0007) << 12) | (wConstantHigh & 0x00ff)));
+ }
+ }
+ }
+
+ void ThumbEmitLoadRegIndirect(ThumbReg dest, ThumbReg source, int offset)
+ {
+ _ASSERTE((offset >= 0) && (offset <= 4095));
+
+ // ldr regDest, [regSource + #offset]
+ if ((dest < 8) && (source < 8) && ((offset & 0x3) == 0) && (offset < 125))
+ {
+ // Encoding T1
+ Emit16((WORD)(0x6800 | ((offset >> 2) << 6) | (source << 3) | dest));
+ }
+ else
+ {
+ // Encoding T3
+ Emit16((WORD)(0xf8d0 | source));
+ Emit16((WORD)((dest << 12) | offset));
+ }
+ }
+
+ void ThumbEmitLoadIndirectPostIncrement(ThumbReg dest, ThumbReg source, int offset)
+ {
+ _ASSERTE((offset >= 0) && (offset <= 255));
+
+ // ldr regDest, [regSource], #offset
+ Emit16((WORD)(0xf850 | source));
+ Emit16((WORD)(0x0b00 | (dest << 12) | offset));
+ }
+
+ void ThumbEmitStoreRegIndirect(ThumbReg source, ThumbReg dest, int offset)
+ {
+ _ASSERTE((offset >= -255) && (offset <= 4095));
+
+ // str regSource, [regDest + #offset]
+ if (offset < 0)
+ {
+ Emit16((WORD)(0xf840 | dest));
+ Emit16((WORD)(0x0C00 | (source << 12) | (UINT8)(-offset)));
+ }
+ else
+ if ((dest < 8) && (source < 8) && ((offset & 0x3) == 0) && (offset < 125))
+ {
+ // Encoding T1
+ Emit16((WORD)(0x6000 | ((offset >> 2) << 6) | (dest << 3) | source));
+ }
+ else
+ {
+ // Encoding T3
+ Emit16((WORD)(0xf8c0 | dest));
+ Emit16((WORD)((source << 12) | offset));
+ }
+ }
+
+ void ThumbEmitStoreIndirectPostIncrement(ThumbReg source, ThumbReg dest, int offset)
+ {
+ _ASSERTE((offset >= 0) && (offset <= 255));
+
+ // str regSource, [regDest], #offset
+ Emit16((WORD)(0xf840 | dest));
+ Emit16((WORD)(0x0b00 | (source << 12) | offset));
+ }
+
+ void ThumbEmitLoadOffsetScaledReg(ThumbReg dest, ThumbReg base, ThumbReg offset, int shift)
+ {
+ _ASSERTE(shift >=0 && shift <=3);
+
+ Emit16((WORD)(0xf850 | base));
+ Emit16((WORD)((dest << 12) | (shift << 4) | offset));
+ }
+
+ void ThumbEmitCallRegister(ThumbReg target)
+ {
+ // blx regTarget
+ Emit16((WORD)(0x4780 | (target << 3)));
+ }
+
+ void ThumbEmitJumpRegister(ThumbReg target)
+ {
+ // bx regTarget
+ Emit16((WORD)(0x4700 | (target << 3)));
+ }
+
+ void ThumbEmitMovRegReg(ThumbReg dest, ThumbReg source)
+ {
+ // mov regDest, regSource
+ Emit16((WORD)(0x4600 | ((dest > 7) ? 0x0080 : 0x0000) | (source << 3) | (dest & 0x0007)));
+ }
+
+ //Assuming SP is only subtracted in prolog
+ void ThumbEmitSubSp(int value)
+ {
+ _ASSERTE(value >= 0);
+ _ASSERTE((value & 0x3) == 0);
+
+ if(value < 512)
+ {
+ // encoding T1
+ // sub sp, sp, #(value >> 2)
+ Emit16((WORD)(0xb080 | (value >> 2)));
+ }
+ else if(value < 4096)
+ {
+ // Using 32-bit encoding
+ Emit16((WORD)(0xf2ad| ((value & 0x0800) >> 1)));
+ Emit16((WORD)(0x0d00| ((value & 0x0700) << 4) | (value & 0x00ff)));
+ }
+ else
+ {
+ // For values >= 4K (pageSize) must check for guard page
+
+#ifndef CROSSGEN_COMPILE
+ // mov r4, value
+ ThumbEmitMovConstant(ThumbReg(4), value);
+ // mov r12, checkStack
+ ThumbEmitMovConstant(ThumbReg(12), (int)checkStack);
+ // bl r12
+ ThumbEmitCallRegister(ThumbReg(12));
+#endif
+
+ // sub sp,sp,r4
+ Emit16((WORD)0xebad);
+ Emit16((WORD)0x0d04);
+ }
+ }
+
+ void ThumbEmitAddSp(int value)
+ {
+ _ASSERTE(value >= 0);
+ _ASSERTE((value & 0x3) == 0);
+
+ if(value < 512)
+ {
+ // encoding T2
+ // add sp, sp, #(value >> 2)
+ Emit16((WORD)(0xb000 | (value >> 2)));
+ }
+ else if(value < 4096)
+ {
+ // Using 32-bit encoding T4
+ Emit16((WORD)(0xf20d| ((value & 0x0800) >> 1)));
+ Emit16((WORD)(0x0d00| ((value & 0x0700) << 4) | (value & 0x00ff)));
+ }
+ else
+ {
+ //Must use temp register for values >=4096
+ ThumbEmitMovConstant(ThumbReg(12), value);
+ // add sp,sp,r12
+ Emit16((WORD)0x44e5);
+ }
+ }
+
+ void ThumbEmitAddReg(ThumbReg dest, ThumbReg source)
+ {
+
+ _ASSERTE(dest != source);
+ Emit16((WORD)(0x4400 | ((dest & 0x8)<<4) | (source<<3) | (dest & 0x7)));
+ }
+
+ void ThumbEmitAdd(ThumbReg dest, ThumbReg source, unsigned int value)
+ {
+
+ if(value<4096)
+ {
+ // addw dest, source, #value
+ unsigned int i = (value & 0x800) >> 11;
+ unsigned int imm3 = (value & 0x700) >> 8;
+ unsigned int imm8 = value & 0xff;
+ Emit16((WORD)(0xf200 | (i << 10) | source));
+ Emit16((WORD)((imm3 << 12) | (dest << 8) | imm8));
+ }
+ else
+ {
+ // if immediate is more than 4096 only ADD (register) will work
+ // move immediate to dest reg and call ADD(reg)
+ // this will not work if dest is same as source.
+ _ASSERTE(dest != source);
+ ThumbEmitMovConstant(dest, value);
+ ThumbEmitAddReg(dest, source);
+ }
+ }
+
+ void ThumbEmitSub(ThumbReg dest, ThumbReg source, unsigned int value)
+ {
+ _ASSERTE(value < 4096);
+
+ // subw dest, source, #value
+ unsigned int i = (value & 0x800) >> 11;
+ unsigned int imm3 = (value & 0x700) >> 8;
+ unsigned int imm8 = value & 0xff;
+ Emit16((WORD)(0xf2a0 | (i << 10) | source));
+ Emit16((WORD)((imm3 << 12) | (dest << 8) | imm8));
+ }
+
+ void ThumbEmitCmpReg(ThumbReg reg1, ThumbReg reg2)
+ {
+ if(reg1 < 8 && reg2 <8)
+ {
+ Emit16((WORD)(0x4280 | reg2 << 3 | reg1));
+ }
+ else
+ {
+ _ASSERTE(reg1 != ThumbReg(15) && reg2 != ThumbReg(15));
+ Emit16((WORD)(0x4500 | reg2 << 3 | (reg1 & 0x7) | (reg1 & 0x8 ? 0x80 : 0x0)));
+ }
+ }
+
+ void ThumbEmitIncrement(ThumbReg dest, unsigned int value)
+ {
+ while (value)
+ {
+ if (value >= 4095)
+ {
+ // addw <dest>, <dest>, #4095
+ ThumbEmitAdd(dest, dest, 4095);
+ value -= 4095;
+ }
+ else if (value <= 255)
+ {
+ // add <dest>, #value
+ Emit16((WORD)(0x3000 | (dest << 8) | value));
+ break;
+ }
+ else
+ {
+ // addw <dest>, <dest>, #value
+ ThumbEmitAdd(dest, dest, value);
+ break;
+ }
+ }
+ }
+
+ void ThumbEmitPush(WORD registers)
+ {
+ _ASSERTE(registers != 0);
+ _ASSERTE((registers & 0xa000) == 0); // Pushing SP or PC undefined
+
+ // push {registers}
+ if (CountBits(registers) == 1)
+ {
+ // Encoding T3 (exactly one register, high or low)
+ WORD reg = 15;
+ while ((registers & (WORD)(1 << reg)) == 0)
+ {
+ reg--;
+ }
+ Emit16(0xf84d);
+ Emit16(0x0d04 | (reg << 12));
+ }
+ else if ((registers & 0xbf00) == 0)
+ {
+ // Encoding T1 (low registers plus maybe LR)
+ Emit16(0xb400 | (registers & thumbRegLr.Mask() ? 0x0100: 0x0000) | (registers & 0x00ff));
+ }
+ else
+ {
+ // Encoding T2 (two or more registers, high or low)
+ Emit16(0xe92d);
+ Emit16(registers);
+ }
+ }
+
+ void ThumbEmitLoadStoreMultiple(ThumbReg base, bool load, WORD registers)
+ {
+ _ASSERTE(CountBits(registers) > 1);
+ _ASSERTE((registers & 0xFF00) == 0); // This only supports the small encoding
+ _ASSERTE(base < 8); // This only supports the small encoding
+ _ASSERTE((base.Mask() & registers) == 0); // This only supports the small encoding
+
+ // (LDM|STM) base, {registers}
+ WORD flag = load ? 0x0800 : 0;
+ Emit16(0xc000 | flag | ((base & 7) << 8) | (registers & 0xFF));
+ }
+
+ void ThumbEmitPop(WORD registers)
+ {
+ _ASSERTE(registers != 0);
+ _ASSERTE((registers & 0xc000) != 0xc000); // Popping PC and LR together undefined
+
+ // pop {registers}
+ if (CountBits(registers) == 1)
+ {
+ // Encoding T3 (exactly one register, high or low)
+ WORD reg = 15;
+ while ((registers & (WORD)(1 << reg)) == 0)
+ {
+ reg--;
+ }
+ Emit16(0xf85d);
+ Emit16(0x0b04 | (reg << 12));
+ }
+ else if ((registers & 0x7f00) == 0)
+ {
+ // Encoding T1 (low registers plus maybe PC)
+ Emit16(0xbc00 | (registers & thumbRegPc.Mask() ? 0x0100: 0x0000) | (registers & 0x00ff));
+ }
+ else
+ {
+ // Encoding T2 (two or more registers, high or low)
+ Emit16(0xe8bd);
+ Emit16(registers);
+ }
+ }
+
+ void ThumbEmitLoadVFPSingleRegIndirect(ThumbVFPSingleReg dest, ThumbReg source, int offset)
+ {
+ _ASSERTE((offset >= -1020) && (offset <= 1020));
+ _ASSERTE(offset%4==0);
+
+ Emit16((WORD) (0xed10 | ((offset > 0 ? 0x1: 0x0) << 7) | ((dest & 0x1) << 6) | source));
+ Emit16((WORD) (0x0a00 | ((dest & 0x1e) << 11) | (abs(offset)>>2)));
+ }
+
+ void ThumbEmitLoadVFPDoubleRegIndirect(ThumbVFPDoubleReg dest, ThumbReg source, int offset)
+ {
+ _ASSERTE((offset >= -1020) && (offset <= 1020));
+ _ASSERTE(offset%4==0);
+
+ Emit16((WORD) (0xed10 | ((offset > 0 ? 0x1: 0x0) << 7) | ((dest & 0x10) << 6) | source));
+ Emit16((WORD) (0x0b00 | ((dest & 0xf) << 12) | (abs(offset)>>2)));
+ }
+
+#ifdef FEATURE_INTERPRETER
+ void ThumbEmitStoreMultipleVFPDoubleReg(ThumbVFPDoubleReg source, ThumbReg dest, unsigned numRegs)
+ {
+ _ASSERTE((numRegs + source) <= 16);
+
+ // The third nibble is 0x8; the 0x4 bit (D) is zero because the source reg number must be less
+ // than 16 for double registers.
+ Emit16((WORD) (0xec80 | 0x80 | dest));
+ Emit16((WORD) (((source & 0xf) << 12) | 0xb00 | numRegs));
+ }
+
+ void ThumbEmitLoadMultipleVFPDoubleReg(ThumbVFPDoubleReg dest, ThumbReg source, unsigned numRegs)
+ {
+ _ASSERTE((numRegs + dest) <= 16);
+
+ // The third nibble is 0x8; the 0x4 bit (D) is zero because the source reg number must be less
+ // than 16 for double registers.
+ Emit16((WORD) (0xec90 | 0x80 | source));
+ Emit16((WORD) (((dest & 0xf) << 12) | 0xb00 | numRegs));
+ }
+#endif // FEATURE_INTERPRETER
+
+ void EmitStubLinkFrame(TADDR pFrameVptr, int offsetOfFrame, int offsetOfTransitionBlock);
+ void EmitStubUnlinkFrame();
+
+ void ThumbEmitCondFlagJump(CodeLabel * target,UINT cond);
+
+ void ThumbEmitCondRegJump(CodeLabel *target, BOOL nonzero, ThumbReg reg);
+
+ void ThumbEmitNearJump(CodeLabel *target);
+
+ // Scratches r12.
+ void ThumbEmitCallManagedMethod(MethodDesc *pMD, bool fTailcall);
+
+ void EmitUnboxMethodStub(MethodDesc* pRealMD);
+ static UINT_PTR HashMulticastInvoke(MetaSig* pSig);
+
+ void EmitMulticastInvoke(UINT_PTR hash);
+ void EmitSecureDelegateInvoke(UINT_PTR hash);
+ void EmitShuffleThunk(struct ShuffleEntry *pShuffleEntryArray);
+#if defined(FEATURE_SHARE_GENERIC_CODE)
+ void EmitInstantiatingMethodStub(MethodDesc* pSharedMD, void* extra);
+#endif // FEATURE_SHARE_GENERIC_CODE
+
+ static Stub * CreateTailCallCopyArgsThunk(CORINFO_SIG_INFO * pSig,
+ CorInfoHelperTailCallSpecialHandling flags);
+
+private:
+ void ThumbCopyOneTailCallArg(UINT * pnSrcAlign, const ArgLocDesc * pArgLoc, UINT * pcbStackSpace);
+ void ThumbEmitCallWithGenericInstantiationParameter(MethodDesc *pMD, void *pHiddenArg);
+};
+
+extern "C" void SinglecastDelegateInvokeStub();
+
+// SEH info forward declarations
+
+inline BOOL IsUnmanagedValueTypeReturnedByRef(UINT sizeofvaluetype)
+{
+ LIMITED_METHOD_CONTRACT;
+
+ // structure that dont fit in the machine-word size are returned
+ // by reference.
+ return (sizeofvaluetype > 4);
+}
+
+struct DECLSPEC_ALIGN(4) UMEntryThunkCode
+{
+ WORD m_code[4];
+
+ TADDR m_pTargetCode;
+ TADDR m_pvSecretParam;
+
+ void Encode(BYTE* pTargetCode, void* pvSecretParam);
+
+ LPCBYTE GetEntryPoint() const
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ return (LPCBYTE)((TADDR)this | THUMB_CODE);
+ }
+
+ static int GetEntryPointOffset()
+ {
+ LIMITED_METHOD_CONTRACT;
+
+ return 0;
+ }
+};
+
+struct HijackArgs
+{
+ union
+ {
+ DWORD R0;
+ size_t ReturnValue[1]; // this may not be the return value when return is >32bits
+ // or return value is in VFP reg but it works for us as
+ // this is only used by functions OnHijackWorker()
+ };
+
+ //
+ // Non-volatile Integer registers
+ //
+ DWORD R4;
+ DWORD R5;
+ DWORD R6;
+ DWORD R7;
+ DWORD R8;
+ DWORD R9;
+ DWORD R10;
+ DWORD R11;
+
+ union
+ {
+ DWORD Lr;
+ size_t ReturnAddress;
+ };
+};
+
+// ClrFlushInstructionCache is used when we want to call FlushInstructionCache
+// for a specific architecture in the common code, but not for other architectures.
+// On IA64 ClrFlushInstructionCache calls the Kernel FlushInstructionCache function
+// to flush the instruction cache.
+// We call ClrFlushInstructionCache whenever we create or modify code in the heap.
+// Currently ClrFlushInstructionCache has no effect on X86
+//
+
+inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode)
+{
+#ifdef CROSSGEN_COMPILE
+ // The code won't be executed when we are cross-compiling so flush instruction cache is unnecessary
+ return TRUE;
+#else
+ return FlushInstructionCache(GetCurrentProcess(), pCodeAddr, sizeOfCode);
+#endif
+}
+
+#ifndef FEATURE_IMPLICIT_TLS
+//
+// JIT HELPER ALIASING FOR PORTABILITY.
+//
+// Create alias for optimized implementations of helpers provided on this platform
+//
+// optimized static helpers
+#define JIT_GetSharedGCStaticBase JIT_GetSharedGCStaticBase_InlineGetAppDomain
+#define JIT_GetSharedNonGCStaticBase JIT_GetSharedNonGCStaticBase_InlineGetAppDomain
+#define JIT_GetSharedGCStaticBaseNoCtor JIT_GetSharedGCStaticBaseNoCtor_InlineGetAppDomain
+#define JIT_GetSharedNonGCStaticBaseNoCtor JIT_GetSharedNonGCStaticBaseNoCtor_InlineGetAppDomain
+
+#endif
+
+#ifndef FEATURE_PAL
+#define JIT_Stelem_Ref JIT_Stelem_Ref
+#endif
+
+//------------------------------------------------------------------------
+//
+// Precode definitions
+//
+//------------------------------------------------------------------------
+//
+// Note: If you introduce new precode implementation below, then please
+// update PrecodeStubManager::CheckIsStub_Internal to account for it.
+
+EXTERN_C VOID STDCALL PrecodeFixupThunk();
+
+#define PRECODE_ALIGNMENT CODE_SIZE_ALIGN
+#define SIZEOF_PRECODE_BASE CODE_SIZE_ALIGN
+#define OFFSETOF_PRECODE_TYPE 0
+
+// Invalid precode type
+struct InvalidPrecode {
+ static const int Type = 0;
+};
+
+struct StubPrecode {
+
+ static const int Type = 0xdf;
+
+ // ldr r12, [pc, #8] ; =m_pMethodDesc
+ // ldr pc, [pc, #0] ; =m_pTarget
+ // dcd pTarget
+ // dcd pMethodDesc
+ WORD m_rgCode[4];
+ TADDR m_pTarget;
+ TADDR m_pMethodDesc;
+
+ void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator);
+
+ TADDR GetMethodDesc()
+ {
+ LIMITED_METHOD_DAC_CONTRACT;
+ return m_pMethodDesc;
+ }
+
+ PCODE GetTarget()
+ {
+ LIMITED_METHOD_DAC_CONTRACT;
+ return m_pTarget;
+ }
+
+ BOOL SetTargetInterlocked(TADDR target, TADDR expected)
+ {
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ EnsureWritableExecutablePages(&m_pTarget);
+ return (TADDR)InterlockedCompareExchange(
+ (LONG*)&m_pTarget, (LONG)target, (LONG)expected) == expected;
+ }
+
+#ifdef FEATURE_PREJIT
+ void Fixup(DataImage *image);
+#endif
+};
+typedef DPTR(StubPrecode) PTR_StubPrecode;
+
+
+struct NDirectImportPrecode {
+
+ static const int Type = 0xe0;
+
+ // ldr r12, [pc, #4] ; =m_pMethodDesc
+ // ldr pc, [pc, #4] ; =m_pTarget
+ // dcd pMethodDesc
+ // dcd pTarget
+ WORD m_rgCode[4];
+ TADDR m_pMethodDesc; // Notice that the fields are reversed compared to StubPrecode. Precode::GetType
+ // takes advantage of this to detect NDirectImportPrecode.
+ TADDR m_pTarget;
+
+ void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator);
+
+ TADDR GetMethodDesc()
+ {
+ LIMITED_METHOD_DAC_CONTRACT;
+ return m_pMethodDesc;
+ }
+
+ PCODE GetTarget()
+ {
+ LIMITED_METHOD_DAC_CONTRACT;
+ return m_pTarget;
+ }
+
+ LPVOID GetEntrypoint()
+ {
+ LIMITED_METHOD_CONTRACT;
+ return (LPVOID)(dac_cast<TADDR>(this) + THUMB_CODE);
+ }
+
+#ifdef FEATURE_PREJIT
+ void Fixup(DataImage *image);
+#endif
+};
+typedef DPTR(NDirectImportPrecode) PTR_NDirectImportPrecode;
+
+
+struct FixupPrecode {
+
+ static const int Type = 0xfc;
+
+ // mov r12, pc
+ // ldr pc, [pc, #4] ; =m_pTarget
+ // dcb m_MethodDescChunkIndex
+ // dcb m_PrecodeChunkIndex
+ // dcd m_pTarget
+ WORD m_rgCode[3];
+ BYTE m_MethodDescChunkIndex;
+ BYTE m_PrecodeChunkIndex;
+ TADDR m_pTarget;
+
+ void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex = 0, int iPrecodeChunkIndex = 0);
+
+ TADDR GetBase()
+ {
+ LIMITED_METHOD_CONTRACT;
+ SUPPORTS_DAC;
+
+ return dac_cast<TADDR>(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode);
+ }
+
+ TADDR GetMethodDesc();
+
+ PCODE GetTarget()
+ {
+ LIMITED_METHOD_DAC_CONTRACT;
+ return m_pTarget;
+ }
+
+ BOOL SetTargetInterlocked(TADDR target, TADDR expected)
+ {
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ EnsureWritableExecutablePages(&m_pTarget);
+ return (TADDR)InterlockedCompareExchange(
+ (LONG*)&m_pTarget, (LONG)target, (LONG)expected) == expected;
+ }
+
+ static BOOL IsFixupPrecodeByASM(PCODE addr)
+ {
+ PTR_WORD pInstr = dac_cast<PTR_WORD>(PCODEToPINSTR(addr));
+
+ return
+ (pInstr[0] == 0x46fc) &&
+ (pInstr[1] == 0xf8df) &&
+ (pInstr[2] == 0xf004);
+ }
+
+#ifdef FEATURE_PREJIT
+ // Partial initialization. Used to save regrouped chunks.
+ void InitForSave(int iPrecodeChunkIndex);
+
+ void Fixup(DataImage *image, MethodDesc * pMD);
+#endif
+
+#ifdef DACCESS_COMPILE
+ void EnumMemoryRegions(CLRDataEnumMemoryFlags flags);
+#endif
+};
+typedef DPTR(FixupPrecode) PTR_FixupPrecode;
+
+
+// Precode to shuffle this and retbuf for closed delegates over static methods with return buffer
+struct ThisPtrRetBufPrecode {
+
+ static const int Type = 0x84;
+
+ // mov r12, r0
+ // mov r0, r1
+ // mov r1, r12
+ // ldr pc, [pc, #0] ; =m_pTarget
+ // dcd pTarget
+ // dcd pMethodDesc
+ WORD m_rgCode[6];
+ TADDR m_pTarget;
+ TADDR m_pMethodDesc;
+
+ void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator);
+
+ TADDR GetMethodDesc()
+ {
+ LIMITED_METHOD_DAC_CONTRACT;
+
+ return m_pMethodDesc;
+ }
+
+ PCODE GetTarget()
+ {
+ LIMITED_METHOD_DAC_CONTRACT;
+ return m_pTarget;
+ }
+
+ BOOL SetTargetInterlocked(TADDR target, TADDR expected)
+ {
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ EnsureWritableExecutablePages(&m_pTarget);
+ return FastInterlockCompareExchange((LONG*)&m_pTarget, (LONG)target, (LONG)expected) == (LONG)expected;
+ }
+};
+typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode;
+
+
+#ifdef HAS_REMOTING_PRECODE
+
+// Precode with embedded remoting interceptor
+struct RemotingPrecode {
+
+ static const int Type = 0x02;
+
+ // push {r1,lr}
+ // ldr r1, [pc, #16] ; =m_pPrecodeRemotingThunk
+ // blx r1
+ // pop {r1,lr}
+ // ldr pc, [pc, #12] ; =m_pLocalTarget
+ // nop ; padding for alignment
+ // dcd m_pMethodDesc
+ // dcd m_pPrecodeRemotingThunk
+ // dcd m_pLocalTarget
+ WORD m_rgCode[8];
+ TADDR m_pMethodDesc;
+ TADDR m_pPrecodeRemotingThunk;
+ TADDR m_pLocalTarget;
+
+ void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator = NULL);
+
+ TADDR GetMethodDesc()
+ {
+ LIMITED_METHOD_DAC_CONTRACT;
+ return m_pMethodDesc;
+ }
+
+ PCODE GetTarget()
+ {
+ LIMITED_METHOD_DAC_CONTRACT;
+ return m_pLocalTarget;
+ }
+
+ BOOL SetTargetInterlocked(TADDR target, TADDR expected)
+ {
+ CONTRACTL
+ {
+ THROWS;
+ GC_TRIGGERS;
+ }
+ CONTRACTL_END;
+
+ EnsureWritableExecutablePages(&m_pLocalTarget);
+ return FastInterlockCompareExchange((LONG*)&m_pLocalTarget, (LONG)target, (LONG)expected) == (LONG)expected;
+ }
+
+#ifdef FEATURE_PREJIT
+ void Fixup(DataImage *image, ZapNode *pCodeNode);
+#endif
+};
+typedef DPTR(RemotingPrecode) PTR_RemotingPrecode;
+
+EXTERN_C void PrecodeRemotingThunk();
+
+#endif // HAS_REMOTING_PRECODE
+
+//**********************************************************************
+// Miscellaneous
+//**********************************************************************
+
+// Given the first halfword value of an ARM (Thumb) instruction (which is either an entire
+// 16-bit instruction, or the high-order halfword of a 32-bit instruction), determine how many bytes
+// the instruction is (2 or 4) and return that.
+inline size_t GetARMInstructionLength(WORD instr)
+{
+ // From the ARM Architecture Reference Manual, A6.1 "Thumb instruction set encoding":
+ // If bits [15:11] of the halfword being decoded take any of the following values, the halfword is the first
+ // halfword of a 32-bit instruction:
+ // 0b11101
+ // 0b11110
+ // 0b11111
+ // Otherwise, the halfword is a 16-bit instruction.
+ if ((instr & 0xf800) > 0xe000)
+ {
+ return 4;
+ }
+ else
+ {
+ return 2;
+ }
+}
+
+// Given a pointer to an ARM (Thumb) instruction address, determine how many bytes
+// the instruction is (2 or 4) and return that.
+inline size_t GetARMInstructionLength(PBYTE pInstr)
+{
+ return GetARMInstructionLength(*(WORD*)pInstr);
+}
+
+EXTERN_C void FCallMemcpy(BYTE* dest, BYTE* src, int len);
+
+#endif // __cgencpu_h__