summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorBruce Forstall <brucefo@microsoft.com>2019-04-29 18:36:59 -0700
committerBruce Forstall <brucefo@microsoft.com>2019-05-17 14:54:20 -0700
commit54f021fbb5a6b2ff12733d7321a491dcd63aa502 (patch)
tree4dec159bc5016334590514b4ca9a02f45165293c /src
parente03923c09ea8e2eaf32d69bee4f4f7486b7d6a51 (diff)
downloadcoreclr-54f021fbb5a6b2ff12733d7321a491dcd63aa502.tar.gz
coreclr-54f021fbb5a6b2ff12733d7321a491dcd63aa502.tar.bz2
coreclr-54f021fbb5a6b2ff12733d7321a491dcd63aa502.zip
Improve stack probing for Linux/arm64
Avoid the case where we miss touching a page with a large outgoing argument space plus a callee large local frame size. Add asserts that prolog SP adjustments before a stack touch don't exceed the threshold we allow. This ensures that a stack touch on the caller will be sufficient for the callee. Fixes #23821
Diffstat (limited to 'src')
-rw-r--r--src/jit/codegenarm64.cpp2
-rw-r--r--src/jit/codegenarmarch.cpp43
-rw-r--r--src/jit/codegencommon.cpp4
-rw-r--r--src/jit/target.h6
4 files changed, 51 insertions, 4 deletions
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index 96222cfc87..8681aa8df6 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -634,6 +634,8 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, i
void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta)
{
assert(spDelta <= 0);
+ assert(-spDelta <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES);
+
unsigned regsToSaveCount = genCountBits(regsToSaveMask);
if (regsToSaveCount == 0)
{
diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp
index 048e0e3ad3..2721bd0db2 100644
--- a/src/jit/codegenarmarch.cpp
+++ b/src/jit/codegenarmarch.cpp
@@ -3864,7 +3864,10 @@ void CodeGen::genStructReturn(GenTree* treeNode)
// This is done before anything has been pushed. The previous frame might have a large outgoing argument
// space that has been allocated, but the lowest addresses have not been touched. Our frame setup might
// not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however,
-// there are always three guard pages, so we will not miss them all.
+// there are always three guard pages, so we will not miss them all. On Linux, there is only one guard
+// page by default, so we need to be more careful. We do an extra probe if we might not have probed
+// recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this
+// on Windows as well just to be consistent, even though it should not be necessary.
//
// On ARM32, the first instruction of the prolog is always a push (which touches the lowest address
// of the stack), either of the LR register or of some argument registers, e.g., in the case of
@@ -3884,19 +3887,32 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
const target_size_t pageSize = compiler->eeGetPageSize();
+#ifdef _TARGET_ARM64_
+ // What offset from the final SP was the last probe? If we haven't probed almost a complete page, and
+ // if the next action on the stack might subtract from SP first, before touching the current SP, then
+ // we do one more probe at the very bottom. This can happen if we call a function on arm64 that does
+ // a "STP fp, lr, [sp-504]!", that is, pre-decrement SP then store. Note that we probe here for arm64,
+ // but we don't alter SP.
+ target_size_t lastTouchDelta = 0;
+#endif // _TARGET_ARM64_
+
assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));
if (frameSize < pageSize)
{
#ifdef _TARGET_ARM_
- // Frame size is (0x0000..0x1000). No probing necessary.
inst_RV_IV(INS_sub, REG_SPBASE, frameSize, EA_PTRSIZE);
#endif // _TARGET_ARM_
+
+#ifdef _TARGET_ARM64_
+ lastTouchDelta = frameSize;
+#endif // _TARGET_ARM64_
}
else if (frameSize < compiler->getVeryLargeFrameSize())
{
#if defined(_TARGET_ARM64_)
regNumber rTemp = REG_ZR; // We don't need a register for the target of the dummy load
+ lastTouchDelta = frameSize;
#else
regNumber rTemp = initReg;
#endif
@@ -3911,9 +3927,14 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, rTemp, REG_SPBASE, initReg);
regSet.verifyRegUsed(initReg);
*pInitRegZeroed = false; // The initReg does not contain zero
+
+#ifdef _TARGET_ARM64_
+ lastTouchDelta -= pageSize;
+#endif // _TARGET_ARM64_
}
#ifdef _TARGET_ARM64_
+ assert(lastTouchDelta == frameSize % pageSize);
compiler->unwindPadding();
#else // !_TARGET_ARM64_
instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize);
@@ -3923,7 +3944,6 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
}
else
{
- // Frame size >= 0x3000
assert(frameSize >= compiler->getVeryLargeFrameSize());
// Emit the following sequence to 'tickle' the pages. Note it is important that stack pointer not change
@@ -4004,11 +4024,28 @@ void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pIni
compiler->unwindPadding();
+#ifdef _TARGET_ARM64_
+ lastTouchDelta = frameSize % pageSize;
+#endif // _TARGET_ARM64_
+
#ifdef _TARGET_ARM_
inst_RV_RV(INS_add, REG_SPBASE, rLimit, TYP_I_IMPL);
#endif // _TARGET_ARM_
}
+#ifdef _TARGET_ARM64_
+ if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)
+ {
+ assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < 2 * pageSize);
+ instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)frameSize);
+ getEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, initReg);
+ compiler->unwindPadding();
+
+ regSet.verifyRegUsed(initReg);
+ *pInitRegZeroed = false; // The initReg does not contain zero
+ }
+#endif // _TARGET_ARM64_
+
#ifdef _TARGET_ARM_
compiler->unwindAllocStack(frameSize);
#ifdef USING_SCOPE_INFO
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 1495356123..6d0a7ebf12 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -4964,6 +4964,8 @@ void CodeGen::genPushCalleeSavedRegisters()
frameType = 1;
+ assert(totalFrameSize <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES);
+
getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize,
INS_OPTS_PRE_INDEX);
compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, -totalFrameSize);
@@ -5010,6 +5012,8 @@ void CodeGen::genPushCalleeSavedRegisters()
// sub sp,sp,#framesz
// stp fp,lr,[sp,#outsz] // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496.
+ assert(totalFrameSize - compiler->lvaOutgoingArgSpaceSize <= STACK_PROBE_BOUNDARY_THRESHOLD_BYTES);
+
getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
compiler->unwindAllocStack(totalFrameSize);
diff --git a/src/jit/target.h b/src/jit/target.h
index b7ae79b4f7..99d7d246d3 100644
--- a/src/jit/target.h
+++ b/src/jit/target.h
@@ -1548,7 +1548,11 @@ typedef unsigned char regNumberSmall;
#define JMP_SIZE_SMALL (4)
- #define STACK_PROBE_BOUNDARY_THRESHOLD_BYTES 504
+ // The number of bytes from the end the last probed page that must also be probed, to allow for some
+ // small SP adjustments without probes. If zero, then the stack pointer can point to the last byte/word
+ // on the stack guard page, and must be touched before any further "SUB SP".
+ // For arm64, this is the maximum prolog establishment pre-indexed (that is SP pre-decrement) offset.
+ #define STACK_PROBE_BOUNDARY_THRESHOLD_BYTES 512
#else
#error Unsupported or unset target architecture