1 files changed, 1338 insertions, 0 deletions
diff --git a/src/vm/arm/cgencpu.h b/src/vm/arm/cgencpu.h
new file mode 100644
index 0000000000..936fdabafb
--- /dev/null
+++ b/src/vm/arm/cgencpu.h
@@ -0,0 +1,1338 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+//
+
+
+#ifndef _TARGET_ARM_
+#error Should only include "cGenCpu.h" for ARM builds
+#endif
+
+#ifndef __cgencpu_h__
+#define __cgencpu_h__
+
+#include "utilcode.h"
+#include "tls.h"
+
+// preferred alignment for data
+#define DATA_ALIGNMENT 4
+
+#define DISPATCH_STUB_FIRST_WORD 0xf8d0
+#define RESOLVE_STUB_FIRST_WORD 0xf8d0
+
+class MethodDesc;
+class FramedMethodFrame;
+class Module;
+struct DeclActionInfo;
+class ComCallMethodDesc;
+class BaseDomain;
+class ZapNode;
+struct ArgLocDesc;
+
+#define USE_REDIRECT_FOR_GCSTRESS
+
+// CPU-dependent functions
+Stub * GenerateInitPInvokeFrameHelper();
+
+EXTERN_C void checkStack(void);
+
+#ifdef CROSSGEN_COMPILE
+#define GetEEFuncEntryPoint(pfn) 0x1001
+#else
+#define GetEEFuncEntryPoint(pfn) GFN_TADDR(pfn)
+#endif
+
+//**********************************************************************
+
+#define COMMETHOD_PREPAD                        12   // # extra bytes to allocate in addition to sizeof(ComCallMethodDesc)
+#ifdef FEATURE_COMINTEROP
+#define COMMETHOD_CALL_PRESTUB_SIZE             12
+#define COMMETHOD_CALL_PRESTUB_ADDRESS_OFFSET   8   // the offset of the call target address inside the prestub
+#endif // FEATURE_COMINTEROP
+
+#define STACK_ALIGN_SIZE                        4
+
+#define JUMP_ALLOCATE_SIZE                      8   // # bytes to allocate for a jump instruction
+#define BACK_TO_BACK_JUMP_ALLOCATE_SIZE         8   // # bytes to allocate for a back to back jump instruction
+
+//#define HAS_COMPACT_ENTRYPOINTS                 1
+
+#define HAS_NDIRECT_IMPORT_PRECODE              1
+
+#define USE_INDIRECT_CODEHEADER
+
+#ifdef FEATURE_REMOTING
+#define HAS_REMOTING_PRECODE                    1
+#endif
+
+EXTERN_C void getFPReturn(int fpSize, INT64 *pRetVal);
+EXTERN_C void setFPReturn(int fpSize, INT64 retVal);
+
+#define HAS_FIXUP_PRECODE                       1
+#define HAS_FIXUP_PRECODE_CHUNKS                1
+
+// ThisPtrRetBufPrecode one is necessary for closed delegates over static methods with return buffer
+#define HAS_THISPTR_RETBUF_PRECODE              1
+
+#define CODE_SIZE_ALIGN                         4
+#define CACHE_LINE_SIZE                         32  // As per Intel Optimization Manual the cache line size is 32 bytes
+#define LOG2SLOT                                LOG2_PTRSIZE
+
+#define ENREGISTERED_RETURNTYPE_MAXSIZE         32  // bytes (maximum HFA size is 4 doubles)
+#define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE 4   // bytes
+
+#define CALLDESCR_ARGREGS                       1   // CallDescrWorker has ArgumentRegister parameter
+#define CALLDESCR_FPARGREGS                     1   // CallDescrWorker has FloatArgumentRegisters parameter
+
+// Max size of optimized TLS helpers
+#define TLS_GETTER_MAX_SIZE 0x10
+
+// Given a return address retrieved during stackwalk,
+// this is the offset by which it should be decremented to arrive at the callsite.
+#define STACKWALK_CONTROLPC_ADJUST_OFFSET 2
+
+//=======================================================================
+// IMPORTANT: This value is used to figure out how much to allocate
+// for a fixed array of FieldMarshaler's. That means it must be at least
+// as large as the largest FieldMarshaler subclass. This requirement
+// is guarded by an assert.
+//=======================================================================
+#define MAXFIELDMARSHALERSIZE               24
+
+//**********************************************************************
+// Parameter size
+//**********************************************************************
+
+typedef INT32 StackElemType;
+#define STACK_ELEM_SIZE sizeof(StackElemType)
+
+// !! This expression assumes STACK_ELEM_SIZE is a power of 2.
+#define StackElemSize(parmSize) (((parmSize) + STACK_ELEM_SIZE - 1) & ~((ULONG)(STACK_ELEM_SIZE - 1)))
+
+//**********************************************************************
+// Frames
+//**********************************************************************
+
+//--------------------------------------------------------------------
+// This represents the callee saved (non-volatile) registers saved as
+// of a FramedMethodFrame.
+//--------------------------------------------------------------------
+typedef DPTR(struct CalleeSavedRegisters) PTR_CalleeSavedRegisters;
+struct CalleeSavedRegisters {
+    INT32 r4, r5, r6, r7, r8, r9, r10;
+    INT32 r11; // frame pointer
+    INT32 r14; // link register
+};
+
+//--------------------------------------------------------------------
+// This represents the arguments that are stored in volatile registers.
+// This should not overlap the CalleeSavedRegisters since those are already
+// saved separately and it would be wasteful to save the same register twice.
+// If we do use a non-volatile register as an argument, then the ArgIterator
+// will probably have to communicate this back to the PromoteCallerStack
+// routine to avoid a double promotion.
+//--------------------------------------------------------------------
+typedef DPTR(struct ArgumentRegisters) PTR_ArgumentRegisters;
+struct ArgumentRegisters {
+    INT32 r[4]; // r0, r1, r2, r3
+};
+#define NUM_ARGUMENT_REGISTERS 4
+
+//--------------------------------------------------------------------
+// This represents the floating point argument registers which are saved
+// as part of the NegInfo for a FramedMethodFrame. Note that these
+// might not be saved by all stubs: typically only those that call into
+// C++ helpers will need to preserve the values in these volatile
+// registers.
+//--------------------------------------------------------------------
+typedef DPTR(struct FloatArgumentRegisters) PTR_FloatArgumentRegisters;
+struct FloatArgumentRegisters {
+    union
+    {
+        float   s[16];  // s0-s15
+        double  d[8];   // d0-d7
+    };
+};
+
+// forward decl
+struct REGDISPLAY;
+typedef REGDISPLAY *PREGDISPLAY;
+
+// Sufficient context for Try/Catch restoration.
+struct EHContext {
+    INT32 r[16]; // note: includes r15(pc)
+    void Setup(PCODE resumePC, PREGDISPLAY regs);
+
+    inline TADDR GetSP() {
+        LIMITED_METHOD_CONTRACT;
+        return (TADDR)r[13];
+    }
+    inline void SetSP(LPVOID esp) {
+        LIMITED_METHOD_CONTRACT;
+        r[13] = (INT32)(size_t)esp;
+    }
+
+    inline LPVOID GetFP() {
+        LIMITED_METHOD_CONTRACT;
+        return (LPVOID)(UINT_PTR)r[11];
+    }
+
+    inline void SetArg(LPVOID arg) {
+        LIMITED_METHOD_CONTRACT;
+        r[0] = (INT32)(size_t)arg;
+    }
+};
+
+#define ARGUMENTREGISTERS_SIZE sizeof(ArgumentRegisters)
+
+//**********************************************************************
+// Exception handling
+//**********************************************************************
+
+inline PCODE GetIP(const T_CONTEXT * context) {
+    LIMITED_METHOD_DAC_CONTRACT;
+    return PCODE(context->Pc);
+}
+
+inline void SetIP(T_CONTEXT *context, PCODE eip) {
+    LIMITED_METHOD_DAC_CONTRACT;
+    context->Pc = DWORD(eip);
+}
+
+inline TADDR GetSP(const T_CONTEXT * context) {
+    LIMITED_METHOD_DAC_CONTRACT;
+    return TADDR(context->Sp);
+}
+
+inline PCODE GetLR(const T_CONTEXT * context) {
+    LIMITED_METHOD_DAC_CONTRACT;
+    return PCODE(context->Lr);
+}
+
+extern "C" LPVOID __stdcall GetCurrentSP();
+
+inline void SetSP(T_CONTEXT *context, TADDR esp) {
+    LIMITED_METHOD_DAC_CONTRACT;
+    context->Sp = DWORD(esp);
+}
+
+inline void SetFP(T_CONTEXT *context, TADDR ebp) {
+    LIMITED_METHOD_DAC_CONTRACT;
+    context->R11 = DWORD(ebp);
+}
+
+inline TADDR GetFP(const T_CONTEXT * context)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+    return (TADDR)(context->R11);
+}
+
+inline void ClearITState(T_CONTEXT *context) {
+    LIMITED_METHOD_DAC_CONTRACT;
+    context->Cpsr = context->Cpsr & 0xf9ff03ff;
+}
+
+#ifdef FEATURE_COMINTEROP
+void emitCOMStubCall (ComCallMethodDesc *pCOMMethod, PCODE target);
+#endif // FEATURE_COMINTEROP
+
+//------------------------------------------------------------------------
+inline void emitJump(LPBYTE pBuffer, LPVOID target)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // The PC-relative load we emit below requires 4-byte alignment for the offset to be calculated correctly.
+    _ASSERTE(((UINT_PTR)pBuffer & 3) == 0);
+
+    DWORD * pCode = (DWORD *)pBuffer;
+
+    // ldr pc, [pc, #0]
+    pCode[0] = 0xf000f8df;
+    pCode[1] = (DWORD)target;
+}
+
+//------------------------------------------------------------------------
+//  Given the same pBuffer that was used by emitJump this method
+//  decodes the instructions and returns the jump target
+inline PCODE decodeJump(PCODE pCode)
+{
+    LIMITED_METHOD_CONTRACT;
+
+    TADDR pInstr = PCODEToPINSTR(pCode);
+
+    return *dac_cast<PTR_PCODE>(pInstr + sizeof(DWORD));
+}
+
+//
+// On IA64 back to back jumps should be separated by a nop bundle to get
+// the best performance from the hardware's branch prediction logic.
+// For all other platforms back to back jumps don't require anything special
+// That is why we have these two wrapper functions that call emitJump and decodeJump
+//
+
+//------------------------------------------------------------------------
+inline BOOL isJump(PCODE pCode)
+{
+    LIMITED_METHOD_DAC_CONTRACT;
+
+    TADDR pInstr = PCODEToPINSTR(pCode);
+
+    return *dac_cast<PTR_DWORD>(pInstr) == 0xf000f8df;
+}
+
+//------------------------------------------------------------------------
+inline BOOL isBackToBackJump(PCODE pBuffer)
+{
+    WRAPPER_NO_CONTRACT;
+    SUPPORTS_DAC;
+    return isJump(pBuffer);
+}
+
+//------------------------------------------------------------------------
+inline void emitBackToBackJump(LPBYTE pBuffer, LPVOID target)
+{
+    WRAPPER_NO_CONTRACT;
+    emitJump(pBuffer, target);
+}
+
+//------------------------------------------------------------------------
+inline PCODE decodeBackToBackJump(PCODE pBuffer)
+{
+    WRAPPER_NO_CONTRACT;
+    return decodeJump(pBuffer);
+}
+
+//----------------------------------------------------------------------
+#include "stublink.h"
+struct ArrayOpScript;
+
+#define THUMB_CODE      1
+
+inline BOOL IsThumbCode(PCODE pCode)
+{
+    return (pCode & THUMB_CODE) != 0;
+}
+
+struct ThumbReg
+{
+    int reg;
+    ThumbReg(int reg):reg(reg)
+    {
+        _ASSERTE(0 <= reg && reg < 16);
+    }
+
+    operator int ()
+    {
+        return reg;
+    }
+
+    int operator == (ThumbReg other)
+    {
+        return reg == other.reg;
+    }
+
+    int operator != (ThumbReg other)
+    {
+        return reg != other.reg;
+    }
+
+    WORD Mask() const
+    {
+        return 1 << reg;
+    }
+
+};
+
+struct ThumbCond
+{
+    int cond;
+    ThumbCond(int cond):cond(cond)
+    {
+        _ASSERTE(0 <= cond && cond < 16);
+    }
+};
+
+struct ThumbVFPSingleReg
+{
+    int reg;
+    ThumbVFPSingleReg(int reg):reg(reg)
+    {
+        _ASSERTE(0 <= reg && reg < 31);
+    }
+
+    operator int ()
+    {
+        return reg;
+    }
+
+    int operator == (ThumbVFPSingleReg other)
+    {
+        return reg == other.reg;
+    }
+
+    int operator != (ThumbVFPSingleReg other)
+    {
+        return reg != other.reg;
+    }
+
+    WORD Mask() const
+    {
+        return 1 << reg;
+    }
+
+};
+
+struct ThumbVFPDoubleReg
+{
+    int reg;
+    ThumbVFPDoubleReg(int reg):reg(reg)
+    {
+        _ASSERTE(0 <= reg && reg < 31);
+    }
+
+    operator int ()
+    {
+        return reg;
+    }
+
+    int operator == (ThumbVFPDoubleReg other)
+    {
+        return reg == other.reg;
+    }
+
+    int operator != (ThumbVFPDoubleReg other)
+    {
+        return reg != other.reg;
+    }
+
+    WORD Mask() const
+    {
+        return 1 << reg;
+    }
+};
+
+const ThumbReg thumbRegFp = ThumbReg(11);
+const ThumbReg thumbRegSp = ThumbReg(13);
+const ThumbReg thumbRegLr = ThumbReg(14);
+const ThumbReg thumbRegPc = ThumbReg(15);
+
+const ThumbCond thumbCondEq = ThumbCond(0);
+const ThumbCond thumbCondNe = ThumbCond(1);
+const ThumbCond thumbCondCs = ThumbCond(2);
+const ThumbCond thumbCondCc = ThumbCond(3);
+const ThumbCond thumbCondMi = ThumbCond(4);
+const ThumbCond thumbCondPl = ThumbCond(5);
+const ThumbCond thumbCondVs = ThumbCond(6);
+const ThumbCond thumbCondVc = ThumbCond(7);
+const ThumbCond thumbCondHi = ThumbCond(8);
+const ThumbCond thumbCondLs = ThumbCond(9);
+const ThumbCond thumbCondGe = ThumbCond(10);
+const ThumbCond thumbCondLt = ThumbCond(11);
+const ThumbCond thumbCondGt = ThumbCond(12);
+const ThumbCond thumbCondLe = ThumbCond(13);
+const ThumbCond thumbCondAl = ThumbCond(14);
+
+class StubLinkerCPU : public StubLinker
+{
+public:
+    static void Init();
+
+    void ThumbEmitProlog(UINT cCalleeSavedRegs, UINT cbStackFrame, BOOL fPushArgRegs)
+    {
+        _ASSERTE(!m_fProlog);
+
+        // Record the parameters of this prolog so that we can generate a matching epilog and unwind info.
+        DescribeProlog(cCalleeSavedRegs, cbStackFrame, fPushArgRegs);
+
+        // Trivial prologs (which is all that we support initially) consist of between one and three
+        // instructions.
+
+        // 1) Push argument registers. This is all or nothing (if we push, we push R0-R3).
+        if (fPushArgRegs)
+        {
+            // push {r0-r3}
+            ThumbEmitPush(ThumbReg(0).Mask() | ThumbReg(1).Mask() | ThumbReg(2).Mask() | ThumbReg(3).Mask());
+        }
+
+        // 2) Push callee saved registers. We always start pushing at R4, and only saved consecutive registers
+        //    from there (max is R11). Additionally we always assume LR is saved for these types of prolog.
+        // push {r4-rX,lr}
+        WORD wRegisters = thumbRegLr.Mask();
+        for (unsigned int i = 4; i < (4 + cCalleeSavedRegs); i++)
+            wRegisters |= ThumbReg(i).Mask();
+        ThumbEmitPush(wRegisters);
+
+        // 3) Reserve space on the stack for the rest of the frame. 
+        if (cbStackFrame)
+        {
+            // sub sp, #cbStackFrame
+            ThumbEmitSubSp(cbStackFrame);
+        }
+    }
+
+    void ThumbEmitEpilog()
+    {
+        // Generate an epilog matching a prolog generated by ThumbEmitProlog.
+        _ASSERTE(m_fProlog);
+
+        // If additional stack space for a frame was allocated remove it now.
+        if (m_cbStackFrame)
+        {
+            // add sp, #m_cbStackFrame
+            ThumbEmitAddSp(m_cbStackFrame);
+        }
+
+        // Pop callee saved registers (we always have at least LR). If no argument registers were saved then
+        // we can restore LR back into PC and we're done. Otherwise LR needs to be restored into LR.
+        // pop {r4-rX,lr|pc}
+        WORD wRegisters = m_fPushArgRegs ? thumbRegLr.Mask() : thumbRegPc.Mask();
+        for (unsigned int i = 4; i < (4 + m_cCalleeSavedRegs); i++)
+            wRegisters |= ThumbReg(i).Mask();
+        ThumbEmitPop(wRegisters);
+
+        if (!m_fPushArgRegs)
+            return;
+
+        // We pushed the argument registers. These aren't restored, but we need to reclaim the stack space.
+        // add sp, #16
+        ThumbEmitAddSp(16);
+
+        // Return. The return address has been restored into LR at this point.
+        // bx lr
+        ThumbEmitJumpRegister(thumbRegLr);
+    }
+
+    void ThumbEmitGetThread(TLSACCESSMODE mode, ThumbReg dest);
+
+    void ThumbEmitNop()
+    {
+        // nop
+        Emit16(0xbf00);
+    }
+
+    void ThumbEmitBreakpoint()
+    {
+        // Permanently undefined instruction #0xfe (see ARMv7-A A6.2.6). The debugger seems to accept this as
+        // a reasonable breakpoint substitute (it's what DebugBreak uses). Bkpt #0, on the other hand, always
+        // seems to flow directly to the kernel debugger (even if we ignore it there it doesn't seem to be
+        // picked up by the user mode debugger).
+        Emit16(0xdefe);
+    }
+
+    void ThumbEmitMovConstant(ThumbReg dest, int constant)
+    {
+        _ASSERT(dest != thumbRegPc);
+
+        //Emit 2 Byte instructions when dest reg < 8 & constant <256
+        if(dest <= 7 && constant < 256 && constant >= 0)
+        {
+            Emit16((WORD)(0x2000 | dest<<8 | (WORD)constant));
+        }
+        else // emit 4 byte instructions
+        {
+            WORD wConstantLow = (WORD)(constant & 0xffff);
+            WORD wConstantHigh = (WORD)(constant >> 16);
+
+            // movw regDest, #wConstantLow
+            Emit16((WORD)(0xf240 | (wConstantLow >> 12) | ((wConstantLow & 0x0800) ? 0x0400 : 0x0000)));
+            Emit16((WORD)((dest << 8) | (((wConstantLow >> 8) & 0x0007) << 12) | (wConstantLow & 0x00ff)));
+
+            if (wConstantHigh)
+            {
+                // movt regDest, #wConstantHighw
+                Emit16((WORD)(0xf2c0 | (wConstantHigh >> 12) | ((wConstantHigh & 0x0800) ? 0x0400 : 0x0000)));
+                Emit16((WORD)((dest << 8) | (((wConstantHigh >> 8) & 0x0007) << 12) | (wConstantHigh & 0x00ff)));
+            }
+        }
+    }
+
+    void ThumbEmitLoadRegIndirect(ThumbReg dest, ThumbReg source, int offset)
+    {
+        _ASSERTE((offset >= 0) && (offset <= 4095));
+
+        // ldr regDest, [regSource + #offset]
+        if ((dest < 8) && (source < 8) && ((offset & 0x3) == 0) && (offset < 125))
+        {
+            // Encoding T1
+            Emit16((WORD)(0x6800 | ((offset >> 2) << 6) | (source << 3) | dest));
+        }
+        else
+        {
+            // Encoding T3
+            Emit16((WORD)(0xf8d0 | source));
+            Emit16((WORD)((dest << 12) | offset));
+        }
+    }
+
+    void ThumbEmitLoadIndirectPostIncrement(ThumbReg dest, ThumbReg source, int offset)
+    {
+        _ASSERTE((offset >= 0) && (offset <= 255));
+
+        // ldr regDest, [regSource], #offset
+        Emit16((WORD)(0xf850 | source));
+        Emit16((WORD)(0x0b00 | (dest << 12) | offset));
+    }
+
+    void ThumbEmitStoreRegIndirect(ThumbReg source, ThumbReg dest, int offset)
+    {
+        _ASSERTE((offset >= -255) && (offset <= 4095));
+
+        // str regSource, [regDest + #offset]
+        if (offset < 0)
+        {
+            Emit16((WORD)(0xf840 | dest));
+            Emit16((WORD)(0x0C00 | (source << 12) | (UINT8)(-offset)));
+        }
+        else
+        if ((dest < 8) && (source < 8) && ((offset & 0x3) == 0) && (offset < 125))
+        {
+            // Encoding T1
+            Emit16((WORD)(0x6000 | ((offset >> 2) << 6) | (dest << 3) | source));
+        }
+        else
+        {
+            // Encoding T3
+            Emit16((WORD)(0xf8c0 | dest));
+            Emit16((WORD)((source << 12) | offset));
+        }
+    }
+
+    void ThumbEmitStoreIndirectPostIncrement(ThumbReg source, ThumbReg dest, int offset)
+    {
+        _ASSERTE((offset >= 0) && (offset <= 255));
+
+        // str regSource, [regDest], #offset
+        Emit16((WORD)(0xf840 | dest));
+        Emit16((WORD)(0x0b00 | (source << 12) | offset));
+    }
+
+    void ThumbEmitLoadOffsetScaledReg(ThumbReg dest, ThumbReg base, ThumbReg offset, int shift)
+    {
+        _ASSERTE(shift >=0 && shift <=3);       
+
+        Emit16((WORD)(0xf850 | base));
+        Emit16((WORD)((dest << 12) | (shift << 4) | offset));
+    }
+
+    void ThumbEmitCallRegister(ThumbReg target)
+    {
+        // blx regTarget
+        Emit16((WORD)(0x4780 | (target << 3)));
+    }
+
+    void ThumbEmitJumpRegister(ThumbReg target)
+    {
+        // bx regTarget
+        Emit16((WORD)(0x4700 | (target << 3)));
+    }
+
+    void ThumbEmitMovRegReg(ThumbReg dest, ThumbReg source)
+    {
+        // mov regDest, regSource
+        Emit16((WORD)(0x4600 | ((dest > 7) ? 0x0080 : 0x0000) | (source << 3) | (dest & 0x0007)));
+    }
+
+    //Assuming SP is only subtracted in prolog
+    void ThumbEmitSubSp(int value)
+    {
+        _ASSERTE(value >= 0);
+        _ASSERTE((value & 0x3) == 0);
+
+        if(value < 512)
+        {
+            // encoding T1
+            // sub sp, sp, #(value >> 2)
+            Emit16((WORD)(0xb080 | (value >> 2)));
+        }
+        else if(value < 4096)
+        {
+            // Using 32-bit encoding
+            Emit16((WORD)(0xf2ad| ((value & 0x0800) >> 1)));
+            Emit16((WORD)(0x0d00| ((value & 0x0700) << 4) | (value & 0x00ff)));
+        }
+        else
+        {
+            // For values >= 4K (pageSize) must check for guard page
+
+#ifndef CROSSGEN_COMPILE
+            // mov r4, value
+            ThumbEmitMovConstant(ThumbReg(4), value);
+            // mov r12, checkStack
+            ThumbEmitMovConstant(ThumbReg(12), (int)checkStack);
+            // bl r12
+            ThumbEmitCallRegister(ThumbReg(12));
+#endif
+
+            // sub sp,sp,r4
+            Emit16((WORD)0xebad);
+            Emit16((WORD)0x0d04);
+        }
+    }
+
+    void ThumbEmitAddSp(int value)
+    {
+        _ASSERTE(value >= 0);
+        _ASSERTE((value & 0x3) == 0);
+
+        if(value < 512)
+        {
+            // encoding T2
+            // add sp, sp, #(value >> 2)
+            Emit16((WORD)(0xb000 | (value >> 2)));
+        }
+        else if(value < 4096)
+        {
+            // Using 32-bit encoding T4
+            Emit16((WORD)(0xf20d| ((value & 0x0800) >> 1)));
+            Emit16((WORD)(0x0d00| ((value & 0x0700) << 4) | (value & 0x00ff)));
+        }
+        else
+        {
+            //Must use temp register for values >=4096
+            ThumbEmitMovConstant(ThumbReg(12), value);
+            // add sp,sp,r12
+            Emit16((WORD)0x44e5);
+        }
+    }
+
+    void ThumbEmitAddReg(ThumbReg dest, ThumbReg source)
+    {
+
+        _ASSERTE(dest != source);
+        Emit16((WORD)(0x4400 | ((dest & 0x8)<<4) | (source<<3) | (dest & 0x7)));
+    }
+
+    void ThumbEmitAdd(ThumbReg dest, ThumbReg source, unsigned int value)
+    {
+
+        if(value<4096)
+        {
+            // addw dest, source, #value
+            unsigned int i = (value & 0x800) >> 11;
+            unsigned int imm3 = (value & 0x700) >> 8;
+            unsigned int imm8 = value & 0xff;
+            Emit16((WORD)(0xf200 | (i << 10) | source));
+            Emit16((WORD)((imm3 << 12) | (dest << 8) | imm8));
+        }
+        else
+        {
+            // if immediate is more than 4096 only ADD (register) will work
+            // move immediate to dest reg and call ADD(reg)
+            // this will not work if dest is same as source. 
+            _ASSERTE(dest != source);
+            ThumbEmitMovConstant(dest, value);
+            ThumbEmitAddReg(dest, source);
+        }
+    }
+
+    void ThumbEmitSub(ThumbReg dest, ThumbReg source, unsigned int value)
+    {
+        _ASSERTE(value < 4096);
+
+        // subw dest, source, #value
+        unsigned int i = (value & 0x800) >> 11;
+        unsigned int imm3 = (value & 0x700) >> 8;
+        unsigned int imm8 = value & 0xff;
+        Emit16((WORD)(0xf2a0 | (i << 10) | source));
+        Emit16((WORD)((imm3 << 12) | (dest << 8) | imm8));
+    }
+
+    void ThumbEmitCmpReg(ThumbReg reg1, ThumbReg reg2)
+    {
+        if(reg1 < 8 && reg2 <8)
+        {
+            Emit16((WORD)(0x4280 | reg2 << 3 | reg1));
+        }
+        else 
+        {
+            _ASSERTE(reg1 != ThumbReg(15) && reg2 != ThumbReg(15));
+            Emit16((WORD)(0x4500 | reg2 << 3 | (reg1 & 0x7) | (reg1 & 0x8 ? 0x80 : 0x0)));
+        }
+    }
+    
+    void ThumbEmitIncrement(ThumbReg dest, unsigned int value)
+    {
+        while (value)
+        {
+            if (value >= 4095)
+            {
+                // addw <dest>, <dest>, #4095
+                ThumbEmitAdd(dest, dest, 4095);
+                value -= 4095;
+            }
+            else if (value <= 255)
+            {
+                // add <dest>, #value
+                Emit16((WORD)(0x3000 | (dest << 8) | value));
+                break;
+            }
+            else
+            {
+                // addw <dest>, <dest>, #value
+                ThumbEmitAdd(dest, dest, value);
+                break;
+            }
+        }
+    }
+
+    void ThumbEmitPush(WORD registers)
+    {
+        _ASSERTE(registers != 0);
+        _ASSERTE((registers & 0xa000) == 0); // Pushing SP or PC undefined
+
+        // push {registers}
+        if (CountBits(registers) == 1)
+        {
+            // Encoding T3 (exactly one register, high or low)
+            WORD reg = 15;
+            while ((registers & (WORD)(1 << reg)) == 0)
+            {
+                reg--;
+            }
+            Emit16(0xf84d);
+            Emit16(0x0d04 | (reg << 12));
+        }
+        else if ((registers & 0xbf00) == 0)
+        {
+            // Encoding T1 (low registers plus maybe LR)
+            Emit16(0xb400 | (registers & thumbRegLr.Mask() ? 0x0100: 0x0000) | (registers & 0x00ff));
+        }
+        else
+        {
+            // Encoding T2 (two or more registers, high or low)
+            Emit16(0xe92d);
+            Emit16(registers);
+        }
+    }
+
+    void ThumbEmitLoadStoreMultiple(ThumbReg base, bool load, WORD registers)
+    {
+        _ASSERTE(CountBits(registers) > 1);
+        _ASSERTE((registers & 0xFF00) == 0); // This only supports the small encoding
+        _ASSERTE(base < 8); // This only supports the small encoding
+        _ASSERTE((base.Mask() & registers) == 0); // This only supports the small encoding
+
+        // (LDM|STM) base, {registers}
+        WORD flag = load ? 0x0800 : 0;
+        Emit16(0xc000 | flag | ((base & 7) << 8) | (registers & 0xFF));
+    }
+
+    void ThumbEmitPop(WORD registers)
+    {
+        _ASSERTE(registers != 0);
+        _ASSERTE((registers & 0xc000) != 0xc000); // Popping PC and LR together undefined
+
+        // pop {registers}
+        if (CountBits(registers) == 1)
+        {
+            // Encoding T3 (exactly one register, high or low)
+            WORD reg = 15;
+            while ((registers & (WORD)(1 << reg)) == 0)
+            {
+                reg--;
+            }
+            Emit16(0xf85d);
+            Emit16(0x0b04 | (reg << 12));
+        }
+        else if ((registers & 0x7f00) == 0)
+        {
+            // Encoding T1 (low registers plus maybe PC)
+            Emit16(0xbc00 | (registers & thumbRegPc.Mask() ? 0x0100: 0x0000) | (registers & 0x00ff));
+        }
+        else
+        {
+            // Encoding T2 (two or more registers, high or low)
+            Emit16(0xe8bd);
+            Emit16(registers);
+        }
+    }
+
+    void ThumbEmitLoadVFPSingleRegIndirect(ThumbVFPSingleReg dest, ThumbReg source, int offset)
+    {
+        _ASSERTE((offset >= -1020) && (offset <= 1020));
+        _ASSERTE(offset%4==0);
+
+        Emit16((WORD) (0xed10 | ((offset > 0 ? 0x1: 0x0) << 7) | ((dest & 0x1) << 6) | source));
+        Emit16((WORD) (0x0a00 | ((dest & 0x1e) << 11) | (abs(offset)>>2)));
+    }
+
+    void ThumbEmitLoadVFPDoubleRegIndirect(ThumbVFPDoubleReg dest, ThumbReg source, int offset)
+    {
+        _ASSERTE((offset >= -1020) && (offset <= 1020));
+        _ASSERTE(offset%4==0);
+
+        Emit16((WORD) (0xed10 | ((offset > 0 ? 0x1: 0x0) << 7) | ((dest & 0x10) << 6) | source));
+        Emit16((WORD) (0x0b00 | ((dest & 0xf) << 12) | (abs(offset)>>2)));
+    }
+
+#ifdef FEATURE_INTERPRETER
+    void ThumbEmitStoreMultipleVFPDoubleReg(ThumbVFPDoubleReg source, ThumbReg dest, unsigned numRegs)
+    {
+        _ASSERTE((numRegs + source) <= 16);
+
+        // The third nibble is 0x8; the 0x4 bit (D) is zero because the source reg number must be less
+        // than 16 for double registers.
+        Emit16((WORD) (0xec80 | 0x80 | dest));
+        Emit16((WORD) (((source & 0xf) << 12) | 0xb00 | numRegs));
+    }
+
+    void ThumbEmitLoadMultipleVFPDoubleReg(ThumbVFPDoubleReg dest, ThumbReg source, unsigned numRegs)
+    {
+        _ASSERTE((numRegs + dest) <= 16);
+
+        // The third nibble is 0x8; the 0x4 bit (D) is zero because the source reg number must be less
+        // than 16 for double registers.
+        Emit16((WORD) (0xec90 | 0x80 | source));
+        Emit16((WORD) (((dest & 0xf) << 12) | 0xb00 | numRegs));
+    }
+#endif // FEATURE_INTERPRETER
+
+    void EmitStubLinkFrame(TADDR pFrameVptr, int offsetOfFrame, int offsetOfTransitionBlock);
+    void EmitStubUnlinkFrame();
+
+    void ThumbEmitCondFlagJump(CodeLabel * target,UINT cond);
+
+    void ThumbEmitCondRegJump(CodeLabel *target, BOOL nonzero, ThumbReg reg);
+
+    void ThumbEmitNearJump(CodeLabel *target);
+
+    // Scratches r12.
+    void ThumbEmitCallManagedMethod(MethodDesc *pMD, bool fTailcall);
+
+    void EmitUnboxMethodStub(MethodDesc* pRealMD);
+    static UINT_PTR HashMulticastInvoke(MetaSig* pSig);
+
+    void EmitMulticastInvoke(UINT_PTR hash);
+    void EmitSecureDelegateInvoke(UINT_PTR hash);
+    void EmitShuffleThunk(struct ShuffleEntry *pShuffleEntryArray);
+#if defined(FEATURE_SHARE_GENERIC_CODE)  
+    void EmitInstantiatingMethodStub(MethodDesc* pSharedMD, void* extra);
+#endif // FEATURE_SHARE_GENERIC_CODE
+
+    static Stub * CreateTailCallCopyArgsThunk(CORINFO_SIG_INFO * pSig,
+                                              CorInfoHelperTailCallSpecialHandling flags);
+
+private:
+    void ThumbCopyOneTailCallArg(UINT * pnSrcAlign, const ArgLocDesc * pArgLoc, UINT * pcbStackSpace);
+    void ThumbEmitCallWithGenericInstantiationParameter(MethodDesc *pMD, void *pHiddenArg);
+};
+
+extern "C" void SinglecastDelegateInvokeStub();
+
+// SEH info forward declarations
+
+inline BOOL IsUnmanagedValueTypeReturnedByRef(UINT sizeofvaluetype) 
+{
+    LIMITED_METHOD_CONTRACT;
+
+    // structure that dont fit in the machine-word size are returned
+    // by reference.
+    return (sizeofvaluetype > 4);
+}
+
+struct DECLSPEC_ALIGN(4) UMEntryThunkCode
+{
+    WORD        m_code[4];
+
+    TADDR       m_pTargetCode;
+    TADDR       m_pvSecretParam;
+
+    void Encode(BYTE* pTargetCode, void* pvSecretParam);
+
+    LPCBYTE GetEntryPoint() const
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        return (LPCBYTE)((TADDR)this | THUMB_CODE);
+    }
+
+    static int GetEntryPointOffset()
+    {
+        LIMITED_METHOD_CONTRACT;
+
+        return 0;
+    }
+};
+
+struct HijackArgs
+{
+    union
+    {
+        DWORD R0;
+        size_t ReturnValue[1]; // this may not be the return value when return is >32bits 
+                               // or return value is in VFP reg but it works for us as 
+                               // this is only used by functions OnHijackWorker()
+    };
+
+    //
+    // Non-volatile Integer registers
+    //
+    DWORD R4;
+    DWORD R5;
+    DWORD R6;
+    DWORD R7;
+    DWORD R8;
+    DWORD R9;
+    DWORD R10;
+    DWORD R11;
+
+    union
+    {
+        DWORD Lr;
+        size_t ReturnAddress;
+    };
+};
+
+// ClrFlushInstructionCache is used when we want to call FlushInstructionCache
+// for a specific architecture in the common code, but not for other architectures.
+// On IA64 ClrFlushInstructionCache calls the Kernel FlushInstructionCache function
+// to flush the instruction cache. 
+// We call ClrFlushInstructionCache whenever we create or modify code in the heap. 
+// Currently ClrFlushInstructionCache has no effect on X86
+//
+
+inline BOOL ClrFlushInstructionCache(LPCVOID pCodeAddr, size_t sizeOfCode)
+{
+#ifdef CROSSGEN_COMPILE
+    // The code won't be executed when we are cross-compiling so flush instruction cache is unnecessary
+    return TRUE;
+#else
+    return FlushInstructionCache(GetCurrentProcess(), pCodeAddr, sizeOfCode);
+#endif
+}
+
+#ifndef FEATURE_IMPLICIT_TLS
+//
+// JIT HELPER ALIASING FOR PORTABILITY.
+//
+// Create alias for optimized implementations of helpers provided on this platform
+//
+// optimized static helpers 
+#define JIT_GetSharedGCStaticBase           JIT_GetSharedGCStaticBase_InlineGetAppDomain
+#define JIT_GetSharedNonGCStaticBase        JIT_GetSharedNonGCStaticBase_InlineGetAppDomain
+#define JIT_GetSharedGCStaticBaseNoCtor     JIT_GetSharedGCStaticBaseNoCtor_InlineGetAppDomain
+#define JIT_GetSharedNonGCStaticBaseNoCtor  JIT_GetSharedNonGCStaticBaseNoCtor_InlineGetAppDomain
+
+#endif
+
+#ifndef FEATURE_PAL
+#define JIT_Stelem_Ref                      JIT_Stelem_Ref
+#endif
+
+//------------------------------------------------------------------------
+//
+// Precode definitions
+//
+//------------------------------------------------------------------------
+//
+// Note: If you introduce new precode implementation below, then please
+//       update PrecodeStubManager::CheckIsStub_Internal to account for it.
+
+EXTERN_C VOID STDCALL PrecodeFixupThunk();
+
+#define PRECODE_ALIGNMENT           CODE_SIZE_ALIGN
+#define SIZEOF_PRECODE_BASE         CODE_SIZE_ALIGN
+#define OFFSETOF_PRECODE_TYPE       0
+
+// Invalid precode type
+struct InvalidPrecode {
+    static const int Type = 0;
+};
+
+struct StubPrecode {
+
+    static const int Type = 0xdf;
+
+    // ldr r12, [pc, #8]    ; =m_pMethodDesc
+    // ldr pc, [pc, #0]     ; =m_pTarget
+    // dcd pTarget
+    // dcd pMethodDesc
+    WORD    m_rgCode[4];
+    TADDR   m_pTarget;
+    TADDR   m_pMethodDesc;
+
+    void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator);
+
+    TADDR GetMethodDesc()
+    {
+        LIMITED_METHOD_DAC_CONTRACT; 
+        return m_pMethodDesc;
+    }
+
+    PCODE GetTarget()
+    {
+        LIMITED_METHOD_DAC_CONTRACT; 
+        return m_pTarget;
+    }
+
+    BOOL SetTargetInterlocked(TADDR target, TADDR expected)
+    {
+        CONTRACTL
+        {
+            THROWS;
+            GC_TRIGGERS;
+        }
+        CONTRACTL_END;
+
+        EnsureWritableExecutablePages(&m_pTarget);
+        return (TADDR)InterlockedCompareExchange(
+            (LONG*)&m_pTarget, (LONG)target, (LONG)expected) == expected;
+    }
+
+#ifdef FEATURE_PREJIT
+    void Fixup(DataImage *image);
+#endif
+};
+typedef DPTR(StubPrecode) PTR_StubPrecode;
+
+
+struct NDirectImportPrecode {
+
+    static const int Type = 0xe0;
+
+    // ldr r12, [pc, #4]    ; =m_pMethodDesc
+    // ldr pc, [pc, #4]     ; =m_pTarget
+    // dcd pMethodDesc
+    // dcd pTarget
+    WORD    m_rgCode[4];
+    TADDR   m_pMethodDesc;  // Notice that the fields are reversed compared to StubPrecode. Precode::GetType
+                            // takes advantage of this to detect NDirectImportPrecode.
+    TADDR   m_pTarget;
+
+    void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator);
+
+    TADDR GetMethodDesc()
+    {
+        LIMITED_METHOD_DAC_CONTRACT; 
+        return m_pMethodDesc;
+    }
+
+    PCODE GetTarget()
+    {
+        LIMITED_METHOD_DAC_CONTRACT; 
+        return m_pTarget;
+    }
+
+    LPVOID GetEntrypoint()
+    {
+        LIMITED_METHOD_CONTRACT;
+        return (LPVOID)(dac_cast<TADDR>(this) + THUMB_CODE);
+    }
+
+#ifdef FEATURE_PREJIT
+    void Fixup(DataImage *image);
+#endif
+};
+typedef DPTR(NDirectImportPrecode) PTR_NDirectImportPrecode;
+
+
+struct FixupPrecode {
+
+    static const int Type = 0xfc;
+
+    // mov r12, pc
+    // ldr pc, [pc, #4]     ; =m_pTarget
+    // dcb m_MethodDescChunkIndex
+    // dcb m_PrecodeChunkIndex
+    // dcd m_pTarget
+    WORD    m_rgCode[3];
+    BYTE    m_MethodDescChunkIndex;
+    BYTE    m_PrecodeChunkIndex;
+    TADDR   m_pTarget;
+
+    void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator, int iMethodDescChunkIndex = 0, int iPrecodeChunkIndex = 0);
+
+    TADDR GetBase()
+    {
+        LIMITED_METHOD_CONTRACT;
+        SUPPORTS_DAC;
+
+        return dac_cast<TADDR>(this) + (m_PrecodeChunkIndex + 1) * sizeof(FixupPrecode);
+    }
+
+    TADDR GetMethodDesc();
+
+    PCODE GetTarget()
+    {
+        LIMITED_METHOD_DAC_CONTRACT; 
+        return m_pTarget;
+    }
+
+    BOOL SetTargetInterlocked(TADDR target, TADDR expected)
+    {
+        CONTRACTL
+        {
+            THROWS;
+            GC_TRIGGERS;
+        }
+        CONTRACTL_END;
+
+        EnsureWritableExecutablePages(&m_pTarget);
+        return (TADDR)InterlockedCompareExchange(
+            (LONG*)&m_pTarget, (LONG)target, (LONG)expected) == expected;
+    }
+
+    static BOOL IsFixupPrecodeByASM(PCODE addr)
+    {
+        PTR_WORD pInstr = dac_cast<PTR_WORD>(PCODEToPINSTR(addr));
+
+        return 
+           (pInstr[0] == 0x46fc) &&
+           (pInstr[1] == 0xf8df) &&
+           (pInstr[2] == 0xf004);
+    }
+
+#ifdef FEATURE_PREJIT
+    // Partial initialization. Used to save regrouped chunks.
+    void InitForSave(int iPrecodeChunkIndex);
+
+    void Fixup(DataImage *image, MethodDesc * pMD);
+#endif
+
+#ifdef DACCESS_COMPILE
+    void EnumMemoryRegions(CLRDataEnumMemoryFlags flags);
+#endif
+};
+typedef DPTR(FixupPrecode) PTR_FixupPrecode;
+
+
+// Precode to shuffle this and retbuf for closed delegates over static methods with return buffer
+struct ThisPtrRetBufPrecode {
+
+    static const int Type = 0x84;
+
+    // mov r12, r0
+    // mov r0, r1
+    // mov r1, r12
+    // ldr pc, [pc, #0]     ; =m_pTarget
+    // dcd pTarget
+    // dcd pMethodDesc
+    WORD    m_rgCode[6];
+    TADDR   m_pTarget;
+    TADDR   m_pMethodDesc;
+
+    void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator);
+
+    TADDR GetMethodDesc()
+    {
+        LIMITED_METHOD_DAC_CONTRACT; 
+
+        return m_pMethodDesc;
+    }
+
+    PCODE GetTarget()
+    { 
+        LIMITED_METHOD_DAC_CONTRACT; 
+        return m_pTarget;
+    }
+
+    BOOL SetTargetInterlocked(TADDR target, TADDR expected)
+    {
+        CONTRACTL
+        {
+            THROWS;
+            GC_TRIGGERS;
+        }
+        CONTRACTL_END;
+
+        EnsureWritableExecutablePages(&m_pTarget);
+        return FastInterlockCompareExchange((LONG*)&m_pTarget, (LONG)target, (LONG)expected) == (LONG)expected;
+    }
+};
+typedef DPTR(ThisPtrRetBufPrecode) PTR_ThisPtrRetBufPrecode;
+
+
+#ifdef HAS_REMOTING_PRECODE
+
+// Precode with embedded remoting interceptor
+struct RemotingPrecode {
+
+    static const int Type = 0x02;
+
+    // push {r1,lr}
+    // ldr r1, [pc, #16]    ; =m_pPrecodeRemotingThunk
+    // blx r1
+    // pop {r1,lr}
+    // ldr pc, [pc, #12]    ; =m_pLocalTarget
+    // nop                  ; padding for alignment
+    // dcd m_pMethodDesc
+    // dcd m_pPrecodeRemotingThunk
+    // dcd m_pLocalTarget
+    WORD    m_rgCode[8];
+    TADDR   m_pMethodDesc;
+    TADDR   m_pPrecodeRemotingThunk;
+    TADDR   m_pLocalTarget;
+
+    void Init(MethodDesc* pMD, LoaderAllocator *pLoaderAllocator = NULL);
+
+    TADDR GetMethodDesc()
+    {
+        LIMITED_METHOD_DAC_CONTRACT; 
+        return m_pMethodDesc;
+    }
+
+    PCODE GetTarget()
+    { 
+        LIMITED_METHOD_DAC_CONTRACT; 
+        return m_pLocalTarget;
+    }
+
+    BOOL SetTargetInterlocked(TADDR target, TADDR expected)
+    {
+        CONTRACTL
+        {
+            THROWS;
+            GC_TRIGGERS;
+        }
+        CONTRACTL_END;
+
+        EnsureWritableExecutablePages(&m_pLocalTarget);
+        return FastInterlockCompareExchange((LONG*)&m_pLocalTarget, (LONG)target, (LONG)expected) == (LONG)expected;
+    }
+
+#ifdef FEATURE_PREJIT
+    void Fixup(DataImage *image, ZapNode *pCodeNode);
+#endif
+};
+typedef DPTR(RemotingPrecode) PTR_RemotingPrecode;
+
+EXTERN_C void PrecodeRemotingThunk();
+
+#endif // HAS_REMOTING_PRECODE
+
+//**********************************************************************
+// Miscellaneous
+//**********************************************************************
+
+// Given the first halfword value of an ARM (Thumb) instruction (which is either an entire
+// 16-bit instruction, or the high-order halfword of a 32-bit instruction), determine how many bytes
+// the instruction is (2 or 4) and return that.
+inline size_t GetARMInstructionLength(WORD instr)
+{
+    // From the ARM Architecture Reference Manual, A6.1 "Thumb instruction set encoding":
+    // If bits [15:11] of the halfword being decoded take any of the following values, the halfword is the first
+    // halfword of a 32-bit instruction:
+    //   0b11101
+    //   0b11110
+    //   0b11111
+    // Otherwise, the halfword is a 16-bit instruction.
+    if ((instr & 0xf800) > 0xe000)
+    {
+        return 4;
+    }
+    else 
+    {
+        return 2;
+    }
+}
+
+// Given a pointer to an ARM (Thumb) instruction address, determine how many bytes
+// the instruction is (2 or 4) and return that.
+inline size_t GetARMInstructionLength(PBYTE pInstr)
+{
+    return GetARMInstructionLength(*(WORD*)pInstr);
+}
+
+EXTERN_C void FCallMemcpy(BYTE* dest, BYTE* src, int len);
+
+#endif // __cgencpu_h__