diff options
author | Bruce Forstall <brucefo@microsoft.com> | 2019-04-29 18:36:59 -0700 |
---|---|---|
committer | Bruce Forstall <brucefo@microsoft.com> | 2019-05-17 14:54:20 -0700 |
commit | 54f021fbb5a6b2ff12733d7321a491dcd63aa502 (patch) | |
tree | 4dec159bc5016334590514b4ca9a02f45165293c /src | |
parent | e03923c09ea8e2eaf32d69bee4f4f7486b7d6a51 (diff) | |
download | coreclr-54f021fbb5a6b2ff12733d7321a491dcd63aa502.tar.gz coreclr-54f021fbb5a6b2ff12733d7321a491dcd63aa502.tar.bz2 coreclr-54f021fbb5a6b2ff12733d7321a491dcd63aa502.zip |
Improve stack probing for Linux/arm64
Avoid the case where we miss touching a page with a large
outgoing argument space plus a callee large local frame
size.
Add asserts that prolog SP adjustments before a stack touch
don't exceed the threshold we allow. This ensures that a
stack touch on the caller will be sufficient for the callee.
Fixes #23821
Diffstat (limited to 'src')
-rw-r--r-- | src/jit/codegenarm64.cpp | 2 | ||||
-rw-r--r-- | src/jit/codegenarmarch.cpp | 43 | ||||
-rw-r--r-- | src/jit/codegencommon.cpp | 4 | ||||
-rw-r--r-- | src/jit/target.h | 6 |
4 files changed, 51 insertions, 4 deletions
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 96222cfc87..8681aa8df6 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -634,6 +634,8 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta) { assert(spDelta <= 0); + assert(-spDelta <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES); + unsigned regsToSaveCount = genCountBits(regsToSaveMask); if (regsToSaveCount == 0) { diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp index 048e0e3ad3..2721bd0db2 100644 --- a/src/jit/codegenarmarch.cpp +++ b/src/jit/codegenarmarch.cpp @@ -3864,7 +3864,10 @@ void CodeGen::genStructReturn(GenTree* treeNode) // This is done before anything has been pushed. The previous frame might have a large outgoing argument // space that has been allocated, but the lowest addresses have not been touched. Our frame setup might // not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however, -// there are always three guard pages, so we will not miss them all. +// there are always three guard pages, so we will not miss them all. On Linux, there is only one guard +// page by default, so we need to be more careful. We do an extra probe if we might not have probed +// recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this +// on Windows as well just to be consistent, even though it should not be necessary. // // On ARM32, the first instruction of the prolog is always a push (which touches the lowest address // of the stack), either of the LR register or of some argument registers, e.g., in the case of @@ -3884,19 +3887,32 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni const target_size_t pageSize = compiler->eeGetPageSize(); +#ifdef _TARGET_ARM64_ + // What offset from the final SP was the last probe? If we haven't probed almost a complete page, and + // if the next action on the stack might subtract from SP first, before touching the current SP, then + // we do one more probe at the very bottom. This can happen if we call a function on arm64 that does + // a "STP fp, lr, [sp-504]!", that is, pre-decrement SP then store. Note that we probe here for arm64, + // but we don't alter SP. + target_size_t lastTouchDelta = 0; +#endif // _TARGET_ARM64_ + assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg)); if (frameSize < pageSize) { #ifdef _TARGET_ARM_ - // Frame size is (0x0000..0x1000). No probing necessary. inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE); #endif // _TARGET_ARM_ + +#ifdef _TARGET_ARM64_ + lastTouchDelta = frameSize; +#endif // _TARGET_ARM64_ } else if (frameSize < compiler->getVeryLargeFrameSize()) { #if defined(_TARGET_ARM64_) regNumber rTemp = REG_ZR; // We don't need a register for the target of the dummy load + lastTouchDelta = frameSize; #else regNumber rTemp = initReg; #endif @@ -3911,9 +3927,14 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, rTemp, REG_SPBASE, initReg); regSet.verifyRegUsed(initReg); *pInitRegZeroed = false; // The initReg does not contain zero + +#ifdef _TARGET_ARM64_ + lastTouchDelta -= pageSize; +#endif // _TARGET_ARM64_ } #ifdef _TARGET_ARM64_ + assert(lastTouchDelta == frameSize % pageSize); compiler->unwindPadding(); #else // !_TARGET_ARM64_ instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize); @@ -3923,7 +3944,6 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni } else { - // Frame size >= 0x3000 assert(frameSize >= compiler->getVeryLargeFrameSize()); // Emit the following sequence to 'tickle' the pages. Note it is important that stack pointer not change @@ -4004,11 +4024,28 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni compiler->unwindPadding(); +#ifdef _TARGET_ARM64_ + lastTouchDelta = frameSize % pageSize; +#endif // _TARGET_ARM64_ + #ifdef _TARGET_ARM_ inst_RV_RV(INS_add, REG_SPBASE, rLimit, TYP_I_IMPL); #endif // _TARGET_ARM_ } +#ifdef _TARGET_ARM64_ + if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize) + { + assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < 2 * pageSize); + instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)frameSize); + getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, initReg); + compiler->unwindPadding(); + + regSet.verifyRegUsed(initReg); + *pInitRegZeroed = false; // The initReg does not contain zero + } +#endif // _TARGET_ARM64_ + #ifdef _TARGET_ARM_ compiler->unwindAllocStack(frameSize); #ifdef USING_SCOPE_INFO diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 1495356123..6d0a7ebf12 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -4964,6 +4964,8 @@ void CodeGen::genPushCalleeSavedRegisters() frameType = 1; + assert(totalFrameSize <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES); + getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize, INS_OPTS_PRE_INDEX); compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize); @@ -5010,6 +5012,8 @@ void CodeGen::genPushCalleeSavedRegisters() // sub sp,sp,#framesz // stp fp,lr,[sp,#outsz] // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496. + assert(totalFrameSize - compiler->lvaOutgoingArgSpaceSize <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES); + getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize); compiler->unwindAllocStack(totalFrameSize); diff --git a/src/jit/target.h b/src/jit/target.h index b7ae79b4f7..99d7d246d3 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -1548,7 +1548,11 @@ typedef unsigned char regNumberSmall; #define JMP_SIZE_SMALL (4) - #define STACK_PROBE_BOUNDARY_THRESHOLD_BYTES 504 + // The number of bytes from the end the last probed page that must also be probed, to allow for some + // small SP adjustments without probes. If zero, then the stack pointer can point to the last byte/word + // on the stack guard page, and must be touched before any further "SUB SP". + // For arm64, this is the maximum prolog establishment pre-indexed (that is SP pre-decrement) offset. + #define STACK_PROBE_BOUNDARY_THRESHOLD_BYTES 512 #else #error Unsupported or unset target architecture |