From 381ca0894a01e9bd83ab39d6163c947531051e17 Mon Sep 17 00:00:00 2001 From: Ruben Ayrapetyan Date: Wed, 3 May 2017 17:01:07 +0300 Subject: [PATCH 04/32] Change relocations in ngen-ed code with PC-relative constants for Linux ARM32. --- src/inc/corinfo.h | 15 +++++++++++++++ src/inc/corjit.h | 6 ++++++ src/inc/zapper.h | 2 ++ src/jit/codegen.h | 4 ++++ src/jit/codegenarm.cpp | 12 ++++-------- src/jit/codegencommon.cpp | 47 +++++++++++++++++++++++++++++++++++++++++++++++ src/jit/codegenlegacy.cpp | 9 +++------ src/jit/emit.cpp | 23 +++++++++++++++++++++++ src/jit/emit.h | 5 +++++ src/jit/emitarm.cpp | 4 ++-- src/jit/instr.cpp | 3 +-- src/jit/jitee.h | 12 ++++++++++++ src/zap/zapinfo.cpp | 19 +++++++++++++++++++ src/zap/zapper.cpp | 18 ++++++++++++++---- src/zap/zaprelocs.cpp | 17 +++++++++++++++++ 15 files changed, 174 insertions(+), 22 deletions(-) mode change 100755 => 100644 src/jit/codegen.h diff --git a/src/inc/corinfo.h b/src/inc/corinfo.h index 97f3958..2495de2 100644 --- a/src/inc/corinfo.h +++ b/src/inc/corinfo.h @@ -3074,4 +3074,19 @@ public: #define IMAGE_REL_BASED_REL32 0x10 #define IMAGE_REL_BASED_THUMB_BRANCH24 0x13 +// The identifier for ARM32-specific PC-relative address +// computation corresponds to the following instruction +// sequence: +// l0: movw rX, #imm_lo // 4 byte +// l4: movt rX, #imm_hi // 4 byte +// l8: add rX, pc <- after this instruction rX = relocTarget +// +// Program counter at l8 is address of l8 + 4 +// Address of relocated movw/movt is l0 +// So, imm should be calculated as the following: +// imm = relocTarget - (l8 + 4) = relocTarget - (l0 + 8 + 4) = relocTarget - (l_0 + 12) +// So, the value of offset correction is 12 +// +#define IMAGE_REL_BASED_REL_THUMB_MOV32_PCREL 0x14 + #endif // _COR_INFO_H_ diff --git a/src/inc/corjit.h b/src/inc/corjit.h index e6d067c..e6e8257 100644 --- a/src/inc/corjit.h +++ b/src/inc/corjit.h @@ -148,6 +148,12 @@ public: CORJIT_FLAG_DESKTOP_QUIRKS = 38, // The JIT should generate desktop-quirk-compatible code CORJIT_FLAG_TIER0 = 39, // This is the initial tier for tiered compilation which should generate code as quickly as possible CORJIT_FLAG_TIER1 = 40, // This is the final tier (for now) for tiered compilation which should generate high quality code + +#if defined(_TARGET_ARM_) + CORJIT_FLAG_RELATIVE_CODE_RELOCS = 41, // JIT should generate PC-relative address computations instead of EE relocation records +#else // !defined(_TARGET_ARM_) + CORJIT_FLAG_UNUSED11 = 41 +#endif // !defined(_TARGET_ARM_) }; CORJIT_FLAGS() diff --git a/src/inc/zapper.h b/src/inc/zapper.h index a55ddbe..b846274 100644 --- a/src/inc/zapper.h +++ b/src/inc/zapper.h @@ -448,6 +448,8 @@ class ZapperOptions bool m_fNoMetaData; // Do not copy metadata and IL to native image + void SetCompilerFlags(void); + ZapperOptions(); ~ZapperOptions(); }; diff --git a/src/jit/codegen.h b/src/jit/codegen.h old mode 100755 new mode 100644 index e50e640..471434c --- a/src/jit/codegen.h +++ b/src/jit/codegen.h @@ -361,6 +361,10 @@ protected: /* IN OUT */ bool* pUnwindStarted, bool jmpEpilog); + void genMov32RelocatableDisplacement(BasicBlock* block, regNumber reg); + void genMov32RelocatableDataLabel(unsigned value, regNumber reg); + void genMov32RelocatableImmediate(emitAttr size, unsigned value, regNumber reg); + bool genUsedPopToReturn; // True if we use the pop into PC to return, // False if we didn't and must branch to LR to return. diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp index 40371e3..8f98343 100644 --- a/src/jit/codegenarm.cpp +++ b/src/jit/codegenarm.cpp @@ -44,8 +44,7 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) // Load the address where the finally funclet should return into LR. // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do the return. - getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR); - getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR); + genMov32RelocatableDisplacement(bbFinallyRet, REG_LR); // Jump to the finally BB inst_JMP(EJ_jmp, block->bbJumpDest); @@ -63,8 +62,7 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block) // genEHCatchRet: void CodeGen::genEHCatchRet(BasicBlock* block) { - getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_INTRET); - getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_INTRET); + genMov32RelocatableDisplacement(block->bbJumpDest, REG_INTRET); } //------------------------------------------------------------------------ @@ -82,8 +80,7 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, if (EA_IS_RELOC(size)) { - getEmitter()->emitIns_R_I(INS_movw, size, reg, imm); - getEmitter()->emitIns_R_I(INS_movt, size, reg, imm); + genMov32RelocatableImmediate(size, imm, reg); } else if (imm == 0) { @@ -681,8 +678,7 @@ void CodeGen::genJumpTable(GenTree* treeNode) getEmitter()->emitDataGenEnd(); - getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, jmpTabBase, treeNode->gtRegNum); - getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, jmpTabBase, treeNode->gtRegNum); + genMov32RelocatableDataLabel(jmpTabBase, treeNode->gtRegNum); genProduceReg(treeNode); } diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 50f43fa..9613e4d 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -6380,6 +6380,53 @@ void CodeGen::genFreeLclFrame(unsigned frameSize, /* IN OUT */ bool* pUnwindStar /*----------------------------------------------------------------------------- * + * Move of relocatable displacement value to register + */ +void CodeGen::genMov32RelocatableDisplacement(BasicBlock* block, regNumber reg) +{ + getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block, reg); + getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block, reg); + + if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS)) + { + getEmitter()->emitIns_R_R_R(INS_add, EA_4BYTE_DSP_RELOC, reg, reg, REG_PC); + } +} + +/*----------------------------------------------------------------------------- + * + * Move of relocatable data-label to register + */ +void CodeGen::genMov32RelocatableDataLabel(unsigned value, regNumber reg) +{ + getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, value, reg); + getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, value, reg); + + if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS)) + { + getEmitter()->emitIns_R_R_R(INS_add, EA_HANDLE_CNS_RELOC, reg, reg, REG_PC); + } +} + +/*----------------------------------------------------------------------------- + * + * Move of relocatable immediate to register + */ +void CodeGen::genMov32RelocatableImmediate(emitAttr size, unsigned value, regNumber reg) +{ + _ASSERTE(EA_IS_RELOC(size)); + + getEmitter()->emitIns_R_I(INS_movw, size, reg, value); + getEmitter()->emitIns_R_I(INS_movt, size, reg, value); + + if (compiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS)) + { + getEmitter()->emitIns_R_R_R(INS_add, size, reg, reg, REG_PC); + } +} + +/*----------------------------------------------------------------------------- + * * Returns register mask to push/pop to allocate a small stack frame, * instead of using "sub sp" / "add sp". Returns RBM_NONE if either frame size * is zero, or if we should use "sub sp" / "add sp" instead of push/pop. diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp index b8a239a..178be54 100644 --- a/src/jit/codegenlegacy.cpp +++ b/src/jit/codegenlegacy.cpp @@ -13095,8 +13095,7 @@ void CodeGen::genCodeForBBlist() // Load the address where the finally funclet should return into LR. // The funclet prolog/epilog will do "push {lr}" / "pop {pc}" to do // the return. - getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR); - getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, bbFinallyRet, REG_LR); + genMov32RelocatableDisplacement(bbFinallyRet, REG_LR); regTracker.rsTrackRegTrash(REG_LR); #endif // 0 @@ -13123,8 +13122,7 @@ void CodeGen::genCodeForBBlist() case BBJ_EHCATCHRET: // set r0 to the address the VM should return to after the catch - getEmitter()->emitIns_R_L(INS_movw, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0); - getEmitter()->emitIns_R_L(INS_movt, EA_4BYTE_DSP_RELOC, block->bbJumpDest, REG_R0); + genMov32RelocatableDisplacement(block->bbJumpDest, REG_R0); regTracker.rsTrackRegTrash(REG_R0); __fallthrough; @@ -15509,8 +15507,7 @@ void CodeGen::genTableSwitch(regNumber reg, unsigned jumpCnt, BasicBlock** jumpT // Pick any register except the index register. // regNumber regTabBase = regSet.rsGrabReg(RBM_ALLINT & ~genRegMask(reg)); - getEmitter()->emitIns_R_D(INS_movw, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase); - getEmitter()->emitIns_R_D(INS_movt, EA_HANDLE_CNS_RELOC, jmpTabBase, regTabBase); + genMov32RelocatableDataLabel(jmpTabBase, regTabBase); regTracker.rsTrackRegTrash(regTabBase); // LDR PC, [regTableBase + reg * 4] (encoded as LDR PC, [regTableBase, reg, LSL 2] diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp index d2aa29f..29f79f0 100644 --- a/src/jit/emit.cpp +++ b/src/jit/emit.cpp @@ -7103,6 +7103,29 @@ void emitter::emitRecordRelocation(void* location, /* IN */ #endif // defined(LATE_DISASM) } +#ifdef _TARGET_ARM_ +/***************************************************************************** + * A helper for handling a Thumb-Mov32 of position-independent (PC-relative) value + * + * This routine either records relocation for the location with the EE, + * or creates a virtual relocation entry to perform offset fixup during + * compilation without recording it with EE - depending on which of + * absolute/relocative relocations mode are used for code section. + */ +void emitter::emitHandlePCRelativeMov32(void* location, /* IN */ + void* target) /* IN */ +{ + if (emitComp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_RELATIVE_CODE_RELOCS)) + { + emitRecordRelocation(location, target, IMAGE_REL_BASED_REL_THUMB_MOV32_PCREL); + } + else + { + emitRecordRelocation(location, target, IMAGE_REL_BASED_THUMB_MOV32); + } +} +#endif // _TARGET_ARM_ + /***************************************************************************** * A helper for recording a call site with the EE. */ diff --git a/src/jit/emit.h b/src/jit/emit.h index 5ec8a6a..a925f1f 100644 --- a/src/jit/emit.h +++ b/src/jit/emit.h @@ -2183,6 +2183,11 @@ public: WORD slotNum = 0, /* IN */ INT32 addlDelta = 0); /* IN */ +#ifdef _TARGET_ARM_ + void emitHandlePCRelativeMov32(void* location, /* IN */ + void* target); /* IN */ +#endif + void emitRecordCallSite(ULONG instrOffset, /* IN */ CORINFO_SIG_INFO* callSig, /* IN */ CORINFO_METHOD_HANDLE methodHandle); /* IN */ diff --git a/src/jit/emitarm.cpp b/src/jit/emitarm.cpp index 2b8eb25..9ec8e07 100644 --- a/src/jit/emitarm.cpp +++ b/src/jit/emitarm.cpp @@ -5387,7 +5387,7 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i) { assert(ins == INS_movt || ins == INS_movw); if ((ins == INS_movt) && emitComp->info.compMatchedVM) - emitRecordRelocation((void*)(dst - 8), (void*)distVal, IMAGE_REL_BASED_THUMB_MOV32); + emitHandlePCRelativeMov32((void*)(dst - 8), (void*)distVal); } } else @@ -6011,7 +6011,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) assert((ins == INS_movt) || (ins == INS_movw)); dst += emitOutput_Thumb2Instr(dst, code); if ((ins == INS_movt) && emitComp->info.compMatchedVM) - emitRecordRelocation((void*)(dst - 8), (void*)imm, IMAGE_REL_BASED_THUMB_MOV32); + emitHandlePCRelativeMov32((void*)(dst - 8), (void*)imm); } else { diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp index 5bbfdde..670a709 100644 --- a/src/jit/instr.cpp +++ b/src/jit/instr.cpp @@ -3915,8 +3915,7 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, if (EA_IS_RELOC(size)) { - getEmitter()->emitIns_R_I(INS_movw, size, reg, imm); - getEmitter()->emitIns_R_I(INS_movt, size, reg, imm); + genMov32RelocatableImmediate(size, imm, reg); } else if (arm_Valid_Imm_For_Mov(imm)) { diff --git a/src/jit/jitee.h b/src/jit/jitee.h index 7b0e4a0..7a03dd6 100644 --- a/src/jit/jitee.h +++ b/src/jit/jitee.h @@ -80,6 +80,12 @@ public: JIT_FLAG_DESKTOP_QUIRKS = 38, // The JIT should generate desktop-quirk-compatible code JIT_FLAG_TIER0 = 39, // This is the initial tier for tiered compilation which should generate code as quickly as possible JIT_FLAG_TIER1 = 40, // This is the final tier (for now) for tiered compilation which should generate high quality code + +#if defined(_TARGET_ARM_) + JIT_FLAG_RELATIVE_CODE_RELOCS = 41, // JIT should generate PC-relative address computations instead of EE relocation records +#else // !defined(_TARGET_ARM_) + JIT_FLAG_UNUSED11 = 41 +#endif // !defined(_TARGET_ARM_) }; // clang-format on @@ -192,6 +198,12 @@ public: FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_TIER0, JIT_FLAG_TIER0); FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_TIER1, JIT_FLAG_TIER1); +#if defined(_TARGET_ARM_) + + FLAGS_EQUAL(CORJIT_FLAGS::CORJIT_FLAG_RELATIVE_CODE_RELOCS, JIT_FLAG_RELATIVE_CODE_RELOCS); + +#endif // _TARGET_ARM_ + #undef FLAGS_EQUAL } diff --git a/src/zap/zapinfo.cpp b/src/zap/zapinfo.cpp index 40d14ae..60e03af 100644 --- a/src/zap/zapinfo.cpp +++ b/src/zap/zapinfo.cpp @@ -2481,7 +2481,25 @@ void ZapInfo::recordRelocation(void *location, void *target, #if defined(_TARGET_ARM_) case IMAGE_REL_BASED_THUMB_MOV32: + case IMAGE_REL_BASED_REL_THUMB_MOV32_PCREL: case IMAGE_REL_BASED_THUMB_BRANCH24: + +# ifdef _DEBUG + { + CORJIT_FLAGS jitFlags = m_zapper->m_pOpt->m_compilerFlags; + + if (jitFlags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_RELATIVE_CODE_RELOCS)) + { + _ASSERTE(fRelocType == IMAGE_REL_BASED_REL_THUMB_MOV32_PCREL + || fRelocType == IMAGE_REL_BASED_THUMB_BRANCH24); + } + else + { + _ASSERTE(fRelocType == IMAGE_REL_BASED_THUMB_MOV32 + || fRelocType == IMAGE_REL_BASED_THUMB_BRANCH24); + } + } +# endif // _DEBUG break; #endif @@ -2584,6 +2602,7 @@ void ZapInfo::recordRelocation(void *location, void *target, #if defined(_TARGET_ARM_) case IMAGE_REL_BASED_THUMB_MOV32: + case IMAGE_REL_BASED_REL_THUMB_MOV32_PCREL: PutThumb2Mov32((UINT16 *)location, targetOffset); break; diff --git a/src/zap/zapper.cpp b/src/zap/zapper.cpp index 4d1330e..6b45bc2 100644 --- a/src/zap/zapper.cpp +++ b/src/zap/zapper.cpp @@ -278,8 +278,7 @@ ZapperOptions::ZapperOptions() : m_legacyMode(false) ,m_fNoMetaData(s_fNGenNoMetaData) { - m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_RELOC); - m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_PREJIT); + SetCompilerFlags(); m_zapSet = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_ZapSet); if (m_zapSet != NULL && wcslen(m_zapSet) > 3) @@ -319,6 +318,18 @@ ZapperOptions::~ZapperOptions() delete [] m_repositoryDir; } +void ZapperOptions::SetCompilerFlags(void) +{ + m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_RELOC); + m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_PREJIT); + +#if defined(_TARGET_ARM_) +# if defined(PLATFORM_UNIX) + m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_RELATIVE_CODE_RELOCS); +# endif // defined(PLATFORM_UNIX) +#endif // defined(_TARGET_ARM_) +} + /* --------------------------------------------------------------------------- * * Zapper class * --------------------------------------------------------------------------- */ @@ -370,8 +381,7 @@ Zapper::Zapper(NGenOptions *pOptions, bool fromDllHost) pOptions = ¤tVersionOptions; zo->m_compilerFlags.Reset(); - zo->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_RELOC); - zo->m_compilerFlags.Set(CORJIT_FLAGS::CORJIT_FLAG_PREJIT); + zo->SetCompilerFlags(); zo->m_autodebug = true; if (pOptions->fDebug) diff --git a/src/zap/zaprelocs.cpp b/src/zap/zaprelocs.cpp index 04708c2..059d9a5 100644 --- a/src/zap/zaprelocs.cpp +++ b/src/zap/zaprelocs.cpp @@ -84,6 +84,22 @@ void ZapBaseRelocs::WriteReloc(PVOID pSrc, int offset, ZapNode * pTarget, int ta break; } + case IMAGE_REL_BASED_REL_THUMB_MOV32_PCREL: + { + TADDR pSite = (TADDR)m_pImage->GetBaseAddress() + rva; + + // For details about how the value is calculated, see + // description of IMAGE_REL_BASED_REL_THUMB_MOV32_PCREL + const UINT32 offsetCorrection = 12; + + UINT32 imm32 = pActualTarget - (pSite + offsetCorrection); + + PutThumb2Mov32((UINT16 *)pLocation, imm32); + + // IMAGE_REL_BASED_REL_THUMB_MOV32_PCREL does not need base reloc entry + return; + } + case IMAGE_REL_BASED_THUMB_BRANCH24: { TADDR pSite = (TADDR)m_pImage->GetBaseAddress() + rva; @@ -282,6 +298,7 @@ void ZapBlobWithRelocs::Save(ZapWriter * pZapWriter) #if defined(_TARGET_ARM_) case IMAGE_REL_BASED_THUMB_MOV32: + case IMAGE_REL_BASED_REL_THUMB_MOV32_PCREL: targetOffset = (int)GetThumb2Mov32((UINT16 *)pLocation); break; -- 2.7.4