From 45798f661f8c8c042f3582cde8b611d1c9c7343f Mon Sep 17 00:00:00 2001 From: Kyungwoo Lee Date: Fri, 29 Apr 2016 10:29:28 -0700 Subject: ARM64: Enabling Crossgen End-to-End Mscorlib Fixes https://github.com/dotnet/coreclr/issues/4350 Fixes https://github.com/dotnet/coreclr/issues/4615 This is a bit large change across VM/Zap/JIT to properly support crossgen scenario. 1. Fix incorrect `ldr` encoding with size. 2. Enforce JIT data following JIT code per method by allocating them together. This guarantees correct PC-relative encoding for such constant data access without fix-up. 3. For the general fix-up data acceess, use `adrp/add` instruction pairs with fix-ups. Two more relocations types are implemented in all sides. 4. Interface dispatch stub is now implemented which is needed for interface call for crossgen. I've verified hello world runs with mscorlib.ni.dll. --- src/jit/codegenarm64.cpp | 9 ++-- src/jit/emit.cpp | 20 ++++++++ src/jit/emit.h | 1 + src/jit/emitarm64.cpp | 120 ++++++++++++++++++++++++++++++++++++++--------- src/jit/emitfmtsarm64.h | 2 +- src/jit/instrsarm64.h | 2 +- 6 files changed, 125 insertions(+), 29 deletions(-) (limited to 'src/jit') diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 7b1b2fa92b..bbc46db678 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -2157,12 +2157,11 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, { size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs } - + if (EA_IS_RELOC(size)) { - // Emit a data section constant for a relocatable integer constant. - CORINFO_FIELD_HANDLE hnd = getEmitter()->emitLiteralConst(imm); - getEmitter()->emitIns_R_C(INS_ldr, size, reg, hnd, 0); + // This emits a pair of adrp/add (two instructions) with fix-ups. + getEmitter()->emitIns_R_AI(INS_adrp, size, reg, imm); } else if (imm == 0) { @@ -2252,7 +2251,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types tar // We must load the FP constant from the constant pool // Emit a data section constant for the float or double constant. CORINFO_FIELD_HANDLE hnd = emit->emitFltOrDblConst(dblConst); - emit->emitIns_R_C(INS_ldr, size, targetReg, hnd, 0); + emit->emitIns_R_C(INS_ldr, size, targetReg, hnd, 0); } } break; diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp index b9787fa7d6..a4a20693a4 100644 --- a/src/jit/emit.cpp +++ b/src/jit/emit.cpp @@ -4497,12 +4497,32 @@ unsigned emitter::emitEndCodeGen(Compiler *comp, } #endif +#ifdef _TARGET_ARM64_ + // For arm64, we want to allocate JIT data always adjacent to code similar to what native compiler does. + // This way allows us to use a single `ldr` to access such data like float constant/jmp table. + if (emitTotalColdCodeSize > 0) + { + // JIT data might be far away from the cold code. + NYI_ARM64("Need to handle fix-up to data from cold code."); + } + + emitCmpHandle->allocMem(emitTotalHotCodeSize + emitConsDsc.dsdOffs, emitTotalColdCodeSize, + 0, + xcptnsCount, + allocMemFlag, + (void**)&codeBlock, (void**)&coldCodeBlock, + (void**)&consBlock); + + consBlock = codeBlock + emitTotalHotCodeSize; + +#else emitCmpHandle->allocMem( emitTotalHotCodeSize, emitTotalColdCodeSize, emitConsDsc.dsdOffs, xcptnsCount, allocMemFlag, (void**)&codeBlock, (void**)&coldCodeBlock, (void**)&consBlock); +#endif // if (emitConsDsc.dsdOffs) printf("Cons=%08X\n", consBlock); diff --git a/src/jit/emit.h b/src/jit/emit.h index 95dac33536..67adcdf731 100644 --- a/src/jit/emit.h +++ b/src/jit/emit.h @@ -1131,6 +1131,7 @@ protected: bool idIsDspReloc() const { assert(!idIsTiny()); return _idDspReloc != 0; } void idSetIsDspReloc(bool val = true) { assert(!idIsTiny()); _idDspReloc = val; } + bool idIsReloc() { return idIsDspReloc() || idIsCnsReloc(); } #endif diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp index e4ad5c77b8..2a0bf954b6 100644 --- a/src/jit/emitarm64.cpp +++ b/src/jit/emitarm64.cpp @@ -814,7 +814,7 @@ bool emitter::emitInsMayWriteToGCReg(instrDesc *id) // These are the load/store formats with "target" registers: - case IF_LS_1A: // LS_1A .X......iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB) + case IF_LS_1A: // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB) case IF_LS_2A: // LS_2A .X.......X...... ......nnnnnttttt Rt Rn case IF_LS_2B: // LS_2B .X.......Xiiiiii iiiiiinnnnnttttt Rt Rn imm(0-4095) case IF_LS_2C: // LS_2C .X.......X.iiiii iiiiP.nnnnnttttt Rt Rn imm(-256..+255) pre/post inc @@ -6314,6 +6314,9 @@ void emitter::emitIns_S_I (instruction ins, /***************************************************************************** * * Add an instruction with a register + static member operands. + * Constant is stored into JIT data which is adjacent to code. + * No relocation is needed. PC-relative offset will be encoded directly into instruction. + * */ void emitter::emitIns_R_C (instruction ins, emitAttr attr, @@ -6321,11 +6324,6 @@ void emitter::emitIns_R_C (instruction ins, CORINFO_FIELD_HANDLE fldHnd, int offs) { -#if RELOC_SUPPORT - // Static always need relocs - if (!jitStaticFldIsGlobAddr(fldHnd)) - attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG); -#endif assert(offs >= 0); assert(instrDesc::fitsInSmallCns(offs)); @@ -6350,7 +6348,6 @@ void emitter::emitIns_R_C (instruction ins, } fmt = IF_LS_1A; break; - default: break; } @@ -6369,7 +6366,6 @@ void emitter::emitIns_R_C (instruction ins, dispIns(id); appendToCurIG(id); - } @@ -6413,12 +6409,61 @@ void emitter::emitIns_R_AR (instruction ins, NYI("emitIns_R_AR"); } -void emitter::emitIns_R_AI (instruction ins, - emitAttr attr, - regNumber ireg, - ssize_t disp) +// This computes address from the immediate which is relocatable. +void emitter::emitIns_R_AI(instruction ins, + emitAttr attr, + regNumber ireg, + ssize_t addr) { - NYI("emitIns_R_AI"); + assert(EA_IS_RELOC(attr)); + emitAttr size = EA_SIZE(attr); + insFormat fmt = IF_DI_1E; + bool needAdd = false; + instrDescJmp* id = emitNewInstrJmp(); + + switch (ins) + { + case INS_adrp: + // This computes page address. + // page offset is needed using add. + needAdd = true; + break; + case INS_adr: + break; + default: + unreached(); + } + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + id->idOpSize(size); + id->idAddr()->iiaAddr = (BYTE*)addr; + id->idReg1(ireg); + id->idSetIsDspReloc(); + + dispIns(id); + appendToCurIG(id); + + if (needAdd) + { + // add reg, reg, imm + ins = INS_add; + fmt = IF_DI_2A; + instrDesc* id = emitAllocInstr(attr); + assert(id->idIsReloc()); + + id->idIns(ins); + id->idInsFmt(fmt); + id->idInsOpt(INS_OPTS_NONE); + id->idOpSize(size); + id->idAddr()->iiaAddr = (BYTE*)addr; + id->idReg1(ireg); + id->idReg2(ireg); + + dispIns(id); + appendToCurIG(id); + } } void emitter::emitIns_AR_R (instruction ins, @@ -7849,6 +7894,12 @@ BYTE* emitter::emitOutputLJ(insGroup *ig, BYTE *dst, instrDesc *i dstAddr = emitDataOffsetToPtr(dataOffs); dstOffs = (unsigned) ((ssize_t) (dstAddr - srcAddr) + srcOffs); assert((dstOffs & 3) == 0); + + // Failing the following assertion means the corresponding JIT data is not within +/-1MB range + // from the current code reference. This could happen for a large method or extremely large + // amount of JIT data for the method, or access it from cold method. + // Ideally, we should detect such case earlier to expand the code sequence using a fix-up + // similar to emitIns_R_AI. assert(isValidSimm19(dstOffs)); } else @@ -7991,19 +8042,27 @@ BYTE* emitter::emitOutputLJ(insGroup *ig, BYTE *dst, instrDesc *i } else if (loadLabel) { - if (fmt == IF_LS_1A) // LS_1A XX......iiiiiiii iiiiiiiiiiittttt Rt simm21 + if (fmt == IF_LS_1A) // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt simm21 { // INS_ldr or INS_ldrsw (PC-Relative) // Is the target a vector register? if (isVectorRegister(id->idReg1())) - { - code &= 0x3FFFFFFF; // clear the size bits - code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX + { + code |= insEncodeDatasizeVLS(code, id->idOpSize()); // XX V code |= insEncodeReg_Vt(id->idReg1()); // ttttt } else { + assert(isGeneralRegister(id->idReg1())); + // insEncodeDatasizeLS is not quite right for this case. + // So just specialize it. + if ((ins == INS_ldr) && (id->idOpSize() == EA_8BYTE)) + { + // set the operation size in bit 30 + code |= 0x40000000; + } + code |= insEncodeReg_Rt(id->idReg1()); // ttttt } @@ -8235,7 +8294,7 @@ size_t emitter::emitOutputInstr(insGroup *ig, dst += emitOutputCall(ig, dst, id, code); break; - case IF_LS_1A: // LS_1A XX......iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB) + case IF_LS_1A: // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB) assert(insOptsNone(id->idInsOpt())); assert(id->idIsBound()); @@ -8428,9 +8487,19 @@ size_t emitter::emitOutputInstr(insGroup *ig, case IF_DI_1E: // DI_1E .ii.....iiiiiiii iiiiiiiiiiiddddd Rd simm21 assert(insOptsNone(id->idInsOpt())); - assert(id->idIsBound()); - - dst = emitOutputLJ(ig, dst, id); + if (id->idIsReloc()) + { + code = emitInsCode(ins, fmt); + code |= insEncodeReg_Rd(id->idReg1()); // ddddd + dst += emitOutput_Instr(dst, code); + emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_PAGEBASE_REL21); + } + else + { + // Local jmp/load case which does not need a relocation. + assert(id->idIsBound()); + dst = emitOutputLJ(ig, dst, id); + } sz = sizeof(instrDescJmp); break; @@ -8461,6 +8530,13 @@ size_t emitter::emitOutputInstr(insGroup *ig, code |= insEncodeReg_Rd(id->idReg1()); // ddddd code |= insEncodeReg_Rn(id->idReg2()); // nnnnn dst += emitOutput_Instr(dst, code); + + if (id->idIsReloc()) + { + assert(sz == sizeof(instrDesc)); + assert(id->idAddr()->iiaAddr != nullptr); + emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_PAGEOFFSET_12A); + } break; case IF_DI_2B: // DI_2B X.........Xnnnnn ssssssnnnnnddddd Rd Rn imm(0-63) @@ -9920,7 +9996,7 @@ void emitter::emitDispIns(instrDesc * id, emitDispReg(id->idReg3(), size, false); break; - case IF_LS_1A: // LS_1A XX......iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB) + case IF_LS_1A: // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB) assert(insOptsNone(id->idInsOpt())); emitDispReg(id->idReg1(), size, true); imm = emitGetInsSC(id); diff --git a/src/jit/emitfmtsarm64.h b/src/jit/emitfmtsarm64.h index 722e48c580..06cde03f8c 100644 --- a/src/jit/emitfmtsarm64.h +++ b/src/jit/emitfmtsarm64.h @@ -121,7 +121,7 @@ IF_DEF(BI_1B, IS_NONE, JMP) // BI_1B B.......bbbbbiii IF_DEF(BR_1A, IS_NONE, CALL) // BR_1A ................ ......nnnnn..... Rn ret IF_DEF(BR_1B, IS_NONE, CALL) // BR_1B ................ ......nnnnn..... Rn br blr -IF_DEF(LS_1A, IS_NONE, JMP) // LS_1A .X......iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB) +IF_DEF(LS_1A, IS_NONE, JMP) // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB) IF_DEF(LS_2A, IS_NONE, NONE) // LS_2A .X.......X...... ......nnnnnttttt Rt Rn IF_DEF(LS_2B, IS_NONE, NONE) // LS_2B .X.......Xiiiiii iiiiiinnnnnttttt Rt Rn imm(0-4095) IF_DEF(LS_2C, IS_NONE, NONE) // LS_2C .X.......X.iiiii iiiiP.nnnnnttttt Rt Rn imm(-256..+255) pre/post inc diff --git a/src/jit/instrsarm64.h b/src/jit/instrsarm64.h index 21fddc5fe7..51ec30e0db 100644 --- a/src/jit/instrsarm64.h +++ b/src/jit/instrsarm64.h @@ -91,7 +91,7 @@ INST5(ldr, "ldr", 0,LD, IF_EN5A, 0xB9400000, 0xB9400000, 0xB8400000, // ldr Rt,[Xn+pimm12] LS_2B 1X11100101iiiiii iiiiiinnnnnttttt B940 0000 imm(0-4095<<{2,3}) // ldr Rt,[Xn+simm9] LS_2C 1X111000010iiiii iiiiPPnnnnnttttt B840 0000 [Xn imm(-256..+255) pre/post/no inc] // ldr Rt,[Xn,(Rm,ext,shl)] LS_3A 1X111000011mmmmm oooS10nnnnnttttt B860 0800 [Xn, ext(Rm) LSL {0,2,3}] - // ldr Vt/Rt,[PC+simm19<<2] LS_1A XX011000iiiiiiii iiiiiiiiiiittttt 1800 0000 [PC +- imm(1MB)] + // ldr Vt/Rt,[PC+simm19<<2] LS_1A XX011V00iiiiiiii iiiiiiiiiiittttt 1800 0000 [PC +- imm(1MB)] INST5(ldrsw, "ldrsw", 0,LD, IF_EN5A, 0xB9800000, 0xB9800000, 0xB8800000, 0xB8A00800, 0x98000000) // ldrsw Rt,[Xn] LS_2A 1011100110000000 000000nnnnnttttt B980 0000 -- cgit v1.2.3