summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Forstall <brucefo@microsoft.com>2019-02-07 15:19:06 -0800
committerGitHub <noreply@github.com>2019-02-07 15:19:06 -0800
commitaa8e508302816656477d2ba4a1ec691dfb7af9b1 (patch)
treed0d22f5ed9d13d28ce5e61d3e692a039c0c4a5ce
parent34d50b056150ba9101ee977dded9cee3c663fab6 (diff)
parentc4ac1bc112d927de89e00810ea0fff39dc92f9fe (diff)
downloadcoreclr-aa8e508302816656477d2ba4a1ec691dfb7af9b1.tar.gz
coreclr-aa8e508302816656477d2ba4a1ec691dfb7af9b1.tar.bz2
coreclr-aa8e508302816656477d2ba4a1ec691dfb7af9b1.zip
Merge pull request #22023 from BruceForstall/FixGSWithLocalloc
Fix ARM64 GS with localloc
-rw-r--r--src/jit/codegen.h18
-rw-r--r--src/jit/codegenarm64.cpp601
-rw-r--r--src/jit/codegencommon.cpp496
-rw-r--r--src/jit/codegeninterface.h5
-rw-r--r--src/jit/compiler.cpp28
-rw-r--r--src/jit/compiler.h9
-rw-r--r--src/jit/jitconfigvalues.h31
-rw-r--r--src/jit/lclvars.cpp296
-rw-r--r--src/jit/utils.cpp188
-rw-r--r--src/jit/utils.h54
10 files changed, 1302 insertions, 424 deletions
diff --git a/src/jit/codegen.h b/src/jit/codegen.h
index 4db09e4f06..c1fb7b451d 100644
--- a/src/jit/codegen.h
+++ b/src/jit/codegen.h
@@ -279,10 +279,6 @@ protected:
void genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, regNumber tmpReg, bool* pTmpRegIsZero);
-#ifdef DEBUG
- static void genCheckSPOffset(bool isRegsCountOdd, int spOffset, int slotSize);
-#endif // DEBUG
-
// A simple struct to keep register pairs for prolog and epilog.
struct RegPair
{
@@ -305,12 +301,8 @@ protected:
static int genGetSlotSizeForRegsInMask(regMaskTP regsMask);
- void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask,
- int spDelta,
- int spOffset DEBUGARG(bool isRegsToSaveCountOdd));
- void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask,
- int spDelta,
- int spOffset DEBUGARG(bool isRegsToRestoreCountOdd));
+ void genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset);
+ void genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset);
void genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta);
void genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, int lowestCalleeSavedOffset, int spDelta);
@@ -515,6 +507,12 @@ protected:
void genAmd64EmitterUnitTests();
#endif
+#ifdef _TARGET_ARM64_
+ virtual void SetSaveFpLrWithAllCalleeSavedRegisters(bool value);
+ virtual bool IsSaveFpLrWithAllCalleeSavedRegisters();
+ bool genSaveFpLrWithAllCalleeSavedRegisters;
+#endif // _TARGET_ARM64_
+
//-------------------------------------------------------------------------
//
// End prolog/epilog generation
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index 1e71256389..a9caf107a1 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -392,33 +392,6 @@ void CodeGen::genEpilogRestoreReg(regNumber reg1, int spOffset, int spDelta, reg
}
}
-#ifdef DEBUG
-//------------------------------------------------------------------------
-// genCheckSPOffset: Check Stack Pointer(SP) offset value,
-// it must be 8 to account for alignment for the odd count
-// or it must be 0 for the even count.
-//
-// Arguments:
-// isRegsCountOdd - true if number of registers to save/restore is odd;
-// spOffset - stack pointer offset value;
-// slotSize - stack slot size in bytes.
-//
-// static
-void CodeGen::genCheckSPOffset(bool isRegsCountOdd, int spOffset, int slotSize)
-{
- if (isRegsCountOdd)
- {
- // The offset must be 8 to account for alignment for the odd count.
- assert(spOffset == slotSize);
- }
- else
- {
- // The offset must be 0 for the even count.
- assert(spOffset == 0);
- }
-}
-#endif // DEBUG
-
//------------------------------------------------------------------------
// genBuildRegPairsStack: Build a stack of register pairs for prolog/epilog save/restore for the given mask.
// The first register pair will contain the lowest register. Register pairs will combine neighbor
@@ -454,15 +427,24 @@ void CodeGen::genBuildRegPairsStack(regMaskTP regsMask, ArrayStack<RegPair>* reg
regNumber reg2 = genRegNumFromMask(reg2Mask);
if (reg2 == REG_NEXT(reg1))
{
- // Both registers must have the same type to be saved as pair.
- if (genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2))
+ // The JIT doesn't allow saving pair (R28,FP), even though the
+ // save_regp register pair unwind code specification allows it.
+ // The JIT always saves (FP,LR) as a pair, and uses the save_fplr
+ // unwind code. This only comes up in stress mode scenarios
+ // where callee-saved registers are not allocated completely
+ // from lowest-to-highest, without gaps.
+ if (reg1 != REG_R28)
{
- isPairSave = true;
+ // Both registers must have the same type to be saved as pair.
+ if (genIsValidFloatReg(reg1) == genIsValidFloatReg(reg2))
+ {
+ isPairSave = true;
- regsMask &= ~reg2Mask;
- regsCount -= 1;
+ regsMask &= ~reg2Mask;
+ regsCount -= 1;
- regStack->Push(RegPair(reg1, reg2));
+ regStack->Push(RegPair(reg1, reg2));
+ }
}
}
}
@@ -531,7 +513,7 @@ void CodeGen::genSetUseSaveNextPairs(ArrayStack<RegPair>* regStack)
// static
int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask)
{
- assert((regsMask & (RBM_CALLEE_SAVED | RBM_LR)) == regsMask); // Do not expect anything else.
+ assert((regsMask & (RBM_CALLEE_SAVED | RBM_FP | RBM_LR)) == regsMask); // Do not expect anything else.
static_assert_no_msg(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES);
return REGSIZE_BYTES;
@@ -544,21 +526,11 @@ int CodeGen::genGetSlotSizeForRegsInMask(regMaskTP regsMask)
// regsMask - a mask of registers for prolog generation;
// spDelta - if non-zero, the amount to add to SP before the first register save (or together with it);
// spOffset - the offset from SP that is the beginning of the callee-saved register area;
-// isRegsToSaveCountOdd - (DEBUG only) true if number of registers to save is odd.
//
-void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask,
- int spDelta,
- int spOffset DEBUGARG(bool isRegsToSaveCountOdd))
+void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset)
{
const int slotSize = genGetSlotSizeForRegsInMask(regsMask);
-#ifdef DEBUG
- if (spDelta != 0) // The first store change SP offset, check its value before.
- {
- genCheckSPOffset(isRegsToSaveCountOdd, spOffset, slotSize);
- }
-#endif // DEBUG
-
ArrayStack<RegPair> regStack(compiler->getAllocator(CMK_Codegen));
genBuildRegPairsStack(regsMask, &regStack);
@@ -586,14 +558,23 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask,
//------------------------------------------------------------------------
// genSaveCalleeSavedRegistersHelp: Save the callee-saved registers in 'regsToSaveMask' to the stack frame
-// in the function or funclet prolog. The save set does not contain FP, since that is
-// guaranteed to be saved separately, so we can set up chaining. We can only use the instructions
-// that are allowed by the unwind codes. Integer registers are stored at lower addresses,
-// FP/SIMD registers are stored at higher addresses. The caller ensures that
+// in the function or funclet prolog. Registers are saved in register number order from low addresses
+// to high addresses. This means that integer registers are saved at lower addresses than floatint-point/SIMD
+// registers. However, when genSaveFpLrWithAllCalleeSavedRegisters is true, the integer registers are stored
+// at higher addresses than floating-point/SIMD registers, that is, the relative order of these two classes
+// is reveresed. This is done to put the saved frame pointer very high in the frame, for simplicity.
+//
+// TODO: We could always put integer registers at the higher addresses, if desired, to remove this special
+// case. It would cause many asm diffs when first implemented.
+//
+// If establishing frame pointer chaining, it must be done after saving the callee-saved registers.
+//
+// We can only use the instructions that are allowed by the unwind codes. The caller ensures that
// there is enough space on the frame to store these registers, and that the store instructions
-// we need to use (STR or STP) are encodable with the stack-pointer immediate offsets we need to
-// use. The caller can tell us to fold in a stack pointer adjustment, which we will do with the first instruction. Note
-// that the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the
+// we need to use (STR or STP) are encodable with the stack-pointer immediate offsets we need to use.
+//
+// The caller can tell us to fold in a stack pointer adjustment, which we will do with the first instruction.
+// Note that the stack pointer adjustment must be by a multiple of 16 to preserve the invariant that the
// stack pointer is always 16 byte aligned. If we are saving an odd number of callee-saved
// registers, though, we will have an empty aligment slot somewhere. It turns out we will put
// it below (at a lower address) the callee-saved registers, as that is currently how we
@@ -609,7 +590,7 @@ void CodeGen::genSaveCalleeSavedRegisterGroup(regMaskTP regsMask,
// zero).
//
// Notes:
-// the save set can contain LR in which case LR is saved along with the other callee-saved registers.
+// The save set can contain LR in which case LR is saved along with the other callee-saved registers.
// But currently Jit doesn't use frames without frame pointer on arm64.
//
void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowestCalleeSavedOffset, int spDelta)
@@ -628,18 +609,33 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe
}
assert((spDelta % 16) == 0);
- assert((regsToSaveMask & RBM_FP) == 0); // We never save FP here.
- // We also save LR, even though it is not in RBM_CALLEE_SAVED.
- assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR));
+ // We also can save FP and LR, even though they are not in RBM_CALLEE_SAVED.
+ assert(regsToSaveCount <= genCountBits(RBM_CALLEE_SAVED | RBM_FP | RBM_LR));
-#ifdef DEBUG
- bool isRegsToSaveCountOdd = ((regsToSaveCount % 2) != 0);
-#endif // DEBUG
+ if (genSaveFpLrWithAllCalleeSavedRegisters)
+ {
+ // TODO: always save int regs higher than float, to be consistent?
+ regMaskTP maskSaveRegsFloat = regsToSaveMask & RBM_ALLFLOAT;
+ regMaskTP maskSaveRegsInt = regsToSaveMask & ~maskSaveRegsFloat;
- int spOffset = lowestCalleeSavedOffset; // this is the offset *after* we change SP.
+ if (maskSaveRegsFloat != RBM_NONE)
+ {
+ genSaveCalleeSavedRegisterGroup(maskSaveRegsFloat, spDelta, lowestCalleeSavedOffset);
+ spDelta = 0;
+ lowestCalleeSavedOffset += genCountBits(maskSaveRegsFloat) * FPSAVE_REGSIZE_BYTES;
+ }
- genSaveCalleeSavedRegisterGroup(regsToSaveMask, spDelta, spOffset DEBUGARG(isRegsToSaveCountOdd));
+ if (maskSaveRegsInt != RBM_NONE)
+ {
+ genSaveCalleeSavedRegisterGroup(maskSaveRegsInt, spDelta, lowestCalleeSavedOffset);
+ // No need to update spDelta, lowestCalleeSavedOffset since they're not used after this.
+ }
+ }
+ else
+ {
+ genSaveCalleeSavedRegisterGroup(regsToSaveMask, spDelta, lowestCalleeSavedOffset);
+ }
}
//------------------------------------------------------------------------
@@ -650,9 +646,7 @@ void CodeGen::genSaveCalleeSavedRegistersHelp(regMaskTP regsToSaveMask, int lowe
// spDelta - if non-zero, the amount to add to SP after the last register restore (or together with it);
// spOffset - the offset from SP that is the beginning of the callee-saved register area;
//
-void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask,
- int spDelta,
- int spOffset DEBUGARG(bool isRegsToRestoreCountOdd))
+void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask, int spDelta, int spOffset)
{
const int slotSize = genGetSlotSizeForRegsInMask(regsMask);
@@ -685,13 +679,6 @@ void CodeGen::genRestoreCalleeSavedRegisterGroup(regMaskTP regsMask,
genEpilogRestoreReg(regPair.reg1, spOffset, stackDelta, REG_IP1, nullptr);
}
}
-
-#ifdef DEBUG
- if (stackDelta != 0) // The last restore (the first save) changes SP offset, check its value after.
- {
- genCheckSPOffset(isRegsToRestoreCountOdd, spOffset, slotSize);
- }
-#endif // DEBUG
}
//------------------------------------------------------------------------
@@ -741,21 +728,40 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
}
assert((spDelta % 16) == 0);
- assert((regsToRestoreMask & RBM_FP) == 0); // We never restore FP here.
- // We also restore LR, even though it is not in RBM_CALLEE_SAVED.
- assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED | RBM_LR));
+ // We also can restore FP and LR, even though they are not in RBM_CALLEE_SAVED.
+ assert(regsToRestoreCount <= genCountBits(RBM_CALLEE_SAVED | RBM_FP | RBM_LR));
-#ifdef DEBUG
- bool isRegsToRestoreCountOdd = ((regsToRestoreCount % 2) != 0);
-#endif // DEBUG
+ // Point past the end, to start. We predecrement to find the offset to load from.
+ static_assert_no_msg(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES);
+ int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES;
+
+ if (genSaveFpLrWithAllCalleeSavedRegisters)
+ {
+ // TODO: always save int regs higher than float, to be consistent?
+ regMaskTP maskRestoreRegsFloat = regsToRestoreMask & RBM_ALLFLOAT;
+ regMaskTP maskRestoreRegsInt = regsToRestoreMask & ~maskRestoreRegsFloat;
- assert(REGSIZE_BYTES == FPSAVE_REGSIZE_BYTES);
- int spOffset = lowestCalleeSavedOffset + regsToRestoreCount * REGSIZE_BYTES; // Point past the end, to start. We
- // predecrement to find the offset to
- // load from.
+ // Restore in the opposite order of saving.
+
+ if (maskRestoreRegsInt != RBM_NONE)
+ {
+ int spIntDelta = (maskRestoreRegsFloat != RBM_NONE) ? 0 : spDelta; // should we delay the SP adjustment?
+ genRestoreCalleeSavedRegisterGroup(maskRestoreRegsInt, spIntDelta, spOffset);
+ spOffset -= genCountBits(maskRestoreRegsInt) * REGSIZE_BYTES;
+ }
- genRestoreCalleeSavedRegisterGroup(regsToRestoreMask, spDelta, spOffset DEBUGARG(isRegsToRestoreCountOdd));
+ if (maskRestoreRegsFloat != RBM_NONE)
+ {
+ // If there is any spDelta, it must be used here.
+ genRestoreCalleeSavedRegisterGroup(maskRestoreRegsFloat, spDelta, spOffset);
+ // No need to update spOffset since it's not used after this.
+ }
+ }
+ else
+ {
+ genRestoreCalleeSavedRegisterGroup(regsToRestoreMask, spDelta, spOffset);
+ }
}
// clang-format off
@@ -780,16 +786,17 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
*
* Frame type 1:
* For #outsz == 0 and #framesz <= 512:
- * stp fp,lr,[sp,-#framesz]! ; establish the frame, save FP/LR
+ * stp fp,lr,[sp,-#framesz]! ; establish the frame (predecrement by #framesz), save FP/LR
* stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
*
* The funclet frame is thus:
*
* | |
* |-----------------------|
- * | incoming |
- * | arguments |
+ * | incoming arguments |
* +=======================+ <---- Caller's SP
+ * | Varargs regs space | // Only for varargs main functions; 64 bytes
+ * |-----------------------|
* |Callee saved registers | // multiple of 8 bytes
* |-----------------------|
* | PSP slot | // 8 bytes (omitted in CoreRT ABI)
@@ -798,9 +805,9 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* |-----------------------|
* | Saved FP, LR | // 16 bytes
* |-----------------------| <---- Ambient SP
- * | | |
- * ~ | Stack grows ~
- * | | downward |
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
* V
*
* Frame type 2:
@@ -813,9 +820,10 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
*
* | |
* |-----------------------|
- * | incoming |
- * | arguments |
+ * | incoming arguments |
* +=======================+ <---- Caller's SP
+ * | Varargs regs space | // Only for varargs main functions; 64 bytes
+ * |-----------------------|
* |Callee saved registers | // multiple of 8 bytes
* |-----------------------|
* | PSP slot | // 8 bytes (omitted in CoreRT ABI)
@@ -826,14 +834,15 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* |-----------------------|
* | Outgoing arg space | // multiple of 8 bytes
* |-----------------------| <---- Ambient SP
- * | | |
- * ~ | Stack grows ~
- * | | downward |
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
* V
*
* Frame type 3:
* For #framesz > 512:
- * stp fp,lr,[sp,- (#framesz - #outsz)]! ; establish the frame, save FP/LR: note that it is guaranteed here that (#framesz - #outsz) <= 168
+ * stp fp,lr,[sp,- (#framesz - #outsz)]! ; establish the frame, save FP/LR
+ * ; note that it is guaranteed here that (#framesz - #outsz) <= 240
* stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
* sub sp,sp,#outsz ; create space for outgoing argument space
*
@@ -841,9 +850,10 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
*
* | |
* |-----------------------|
- * | incoming |
- * | arguments |
+ * | incoming arguments |
* +=======================+ <---- Caller's SP
+ * | Varargs regs space | // Only for varargs main functions; 64 bytes
+ * |-----------------------|
* |Callee saved registers | // multiple of 8 bytes
* |-----------------------|
* | PSP slot | // 8 bytes (omitted in CoreRT ABI)
@@ -856,24 +866,123 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* |-----------------------|
* | Outgoing arg space | // multiple of 8 bytes
* |-----------------------| <---- Ambient SP
- * | | |
- * ~ | Stack grows ~
- * | | downward |
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
* V
*
* Both #1 and #2 only change SP once. That means that there will be a maximum of one alignment slot needed. For the general case, #3,
* it is possible that we will need to add alignment to both changes to SP, leading to 16 bytes of alignment. Remember that the stack
- * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 168 bytes:
- * (1 PSP slot + 12 integer registers + 8 FP/SIMD registers) * 8 bytes. The outgoing argument size, however, can be very large, if we call a
- * function that takes a large number of arguments (note that we currently use the same outgoing argument space size in the funclet as for the main
- * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of outgoing arguments for any call).
- * In that case, we need to 16-byte align the initial change to SP, before saving off the callee-saved registers and establishing the PSPsym,
- * so we can use the limited immediate offset encodings we have available, before doing another 16-byte aligned SP adjustment to create the
- * outgoing argument space. Both changes to SP might need to add alignment padding.
+ * pointer needs to be 16 byte aligned at all times. The size of the PSP slot plus callee-saved registers space is a maximum of 240 bytes:
+ *
+ * FP,LR registers
+ * 10 int callee-saved register x19-x28
+ * 8 float callee-saved registers v8-v15
+ * 8 saved integer argument registers x0-x7, if varargs function
+ * 1 PSP slot
+ * 1 alignment slot
+ * == 30 slots * 8 bytes = 240 bytes.
+ *
+ * The outgoing argument size, however, can be very large, if we call a function that takes a large number of
+ * arguments (note that we currently use the same outgoing argument space size in the funclet as for the main
+ * function, even if the funclet doesn't have any calls, or has a much smaller, or larger, maximum number of
+ * outgoing arguments for any call). In that case, we need to 16-byte align the initial change to SP, before
+ * saving off the callee-saved registers and establishing the PSPsym, so we can use the limited immediate offset
+ * encodings we have available, before doing another 16-byte aligned SP adjustment to create the outgoing argument
+ * space. Both changes to SP might need to add alignment padding.
+ *
+ * In addition to the above "standard" frames, we also need to support a frame where the saved FP/LR are at the
+ * highest addresses. This is to match the frame layout (specifically, callee-saved registers including FP/LR
+ * and the PSPSym) that is used in the main function when a GS cookie is required due to the use of localloc.
+ * (Note that localloc cannot be used in a funclet.) In these variants, not only has the position of FP/LR
+ * changed, but where the alignment padding is placed has also changed.
+ *
+ * Frame type 4 (variant of frame types 1 and 2):
+ * For #framesz <= 512:
+ * sub sp,sp,#framesz ; establish the frame
+ * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
+ * stp fp,lr,[sp,#yyy] ; save FP/LR.
+ * ; write PSPSym
+ *
+ * The "#framesz <= 512" condition ensures that after we've established the frame, we can use "stp" with its
+ * maximum allowed offset (504) to save the callee-saved register at the highest address.
+ *
+ * We use "sub" instead of folding it into the next instruction as a predecrement, as we need to write PSPSym
+ * at the bottom of the stack, and there might also be an alignment padding slot.
+ *
+ * The funclet frame is thus:
+ *
+ * | |
+ * |-----------------------|
+ * | incoming arguments |
+ * +=======================+ <---- Caller's SP
+ * | Varargs regs space | // Only for varargs main functions; 64 bytes
+ * |-----------------------|
+ * | Saved LR | // 8 bytes
+ * |-----------------------|
+ * | Saved FP | // 8 bytes
+ * |-----------------------|
+ * |Callee saved registers | // multiple of 8 bytes
+ * |-----------------------|
+ * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
+ * |-----------------------|
+ * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
+ * |-----------------------|
+ * | Outgoing arg space | // multiple of 8 bytes (optional; if #outsz > 0)
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ * Frame type 5 (variant of frame type 3):
+ * For #framesz > 512:
+ * sub sp,sp,(#framesz - #outsz) ; establish part of the frame. Note that it is guaranteed here that (#framesz - #outsz) <= 240
+ * stp x19,x20,[sp,#xxx] ; save callee-saved registers, as necessary
+ * stp fp,lr,[sp,#yyy] ; save FP/LR.
+ * sub sp,sp,#outsz ; create space for outgoing argument space
+ * ; write PSPSym
+ *
+ * For large frames with "#framesz > 512", we must do one SP adjustment first, after which we can save callee-saved
+ * registers with up to the maximum "stp" offset of 504. Then, we can establish the rest of the frame (namely, the
+ * space for the outgoing argument space).
+ *
+ * The funclet frame is thus:
+ *
+ * | |
+ * |-----------------------|
+ * | incoming arguments |
+ * +=======================+ <---- Caller's SP
+ * | Varargs regs space | // Only for varargs main functions; 64 bytes
+ * |-----------------------|
+ * | Saved LR | // 8 bytes
+ * |-----------------------|
+ * | Saved FP | // 8 bytes
+ * |-----------------------|
+ * |Callee saved registers | // multiple of 8 bytes
+ * |-----------------------|
+ * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
+ * |-----------------------|
+ * ~ alignment padding ~ // To make the first SP subtraction 16 byte aligned
+ * |-----------------------|
+ * ~ alignment padding ~ // To make the whole frame 16 byte aligned (specifically, to 16-byte align the outgoing argument space).
+ * |-----------------------|
+ * | Outgoing arg space | // multiple of 8 bytes
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ * Note that in this case we might have 16 bytes of alignment that is adjacent. This is because we are doing 2 SP
+ * subtractions, and each one must be aligned up to 16 bytes.
*
* Note that in all cases, the PSPSym is in exactly the same position with respect to Caller-SP, and that location is the same relative to Caller-SP
* as in the main function.
*
+ * Funclets do not have varargs arguments. However, because the PSPSym must exist at the same offset from Caller-SP as in the main function, we
+ * must add buffer space for the saved varargs argument registers here, if the main function did the same.
+ *
* ; After this header, fill the PSP slot, for use by the VM (it gets reported with the GC info), or by code generation of nested filters.
* ; This is not part of the "OS prolog"; it has no associated unwind data, and is not reversed in the funclet epilog.
*
@@ -919,27 +1028,6 @@ void CodeGen::genRestoreCalleeSavedRegistersHelp(regMaskTP regsToRestoreMask, in
* ldp fp,lr,[sp],#framesz
* ret lr
*
- * The funclet frame is thus:
- *
- * | |
- * |-----------------------|
- * | incoming |
- * | arguments |
- * +=======================+ <---- Caller's SP
- * |Callee saved registers | // multiple of 8 bytes
- * |-----------------------|
- * | PSP slot | // 8 bytes (omitted in CoreRT ABI)
- * |-----------------------|
- * | Saved FP, LR | // 16 bytes
- * |-----------------------|
- * ~ alignment padding ~ // To make the whole frame 16 byte aligned.
- * |-----------------------|
- * | Outgoing arg space | // multiple of 8 bytes
- * |-----------------------| <---- Ambient SP
- * | | |
- * ~ | Stack grows ~
- * | | downward |
- * V
*/
// clang-format on
@@ -982,14 +1070,14 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
maskArgRegsLiveIn = RBM_R0;
}
- int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta;
-
if (genFuncletInfo.fiFrameType == 1)
{
getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
INS_OPTS_PRE_INDEX);
compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
+ maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now
+
assert(genFuncletInfo.fiSpDelta2 == 0);
assert(genFuncletInfo.fiSP_to_FPLR_save_delta == 0);
}
@@ -1007,21 +1095,40 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
genFuncletInfo.fiSP_to_FPLR_save_delta);
compiler->unwindSaveRegPair(REG_FP, REG_LR, genFuncletInfo.fiSP_to_FPLR_save_delta);
+
+ maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now
}
- else
+ else if (genFuncletInfo.fiFrameType == 3)
{
- assert(genFuncletInfo.fiFrameType == 3);
getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, genFuncletInfo.fiSpDelta1,
INS_OPTS_PRE_INDEX);
compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
- lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2; // We haven't done the second adjustment of SP yet.
+ maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now
+ }
+ else if (genFuncletInfo.fiFrameType == 4)
+ {
+ // fiFrameType==4 constraints:
+ assert(genFuncletInfo.fiSpDelta1 < 0);
+ assert(genFuncletInfo.fiSpDelta1 >= -512);
+
+ // generate sub SP,SP,imm
+ genStackPointerAdjustment(genFuncletInfo.fiSpDelta1, REG_NA, nullptr);
+
+ assert(genFuncletInfo.fiSpDelta2 == 0);
+ }
+ else
+ {
+ assert(genFuncletInfo.fiFrameType == 5);
+
+ // Nothing to do here; the first SP adjustment will be done by saving the callee-saved registers.
}
- maskSaveRegsInt &= ~(RBM_LR | RBM_FP); // We've saved these now
+ int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta +
+ genFuncletInfo.fiSpDelta2; // We haven't done the second adjustment of SP yet (if any)
genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, lowestCalleeSavedOffset, 0);
- if (genFuncletInfo.fiFrameType == 3)
+ if ((genFuncletInfo.fiFrameType == 3) || (genFuncletInfo.fiFrameType == 5))
{
// Note that genFuncletInfo.fiSpDelta2 is always a negative value
assert(genFuncletInfo.fiSpDelta2 < 0);
@@ -1033,43 +1140,42 @@ void CodeGen::genFuncletProlog(BasicBlock* block)
// This is the end of the OS-reported prolog for purposes of unwinding
compiler->unwindEndProlog();
- // If there is no PSPSym (CoreRT ABI), we are done.
- if (compiler->lvaPSPSym == BAD_VAR_NUM)
- {
- return;
- }
-
- if (isFilter)
- {
- // This is the first block of a filter
- // Note that register x1 = CallerSP of the containing function
- // X1 is overwritten by the first Load (new callerSP)
- // X2 is scratch when we have a large constant offset
-
- // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or function)
- genInstrWithConstant(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
- genFuncletInfo.fiCallerSP_to_PSP_slot_delta, REG_R2, false);
- regSet.verifyRegUsed(REG_R1);
-
- // Store the PSP value (aka CallerSP)
- genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
- genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false);
-
- // re-establish the frame pointer
- genInstrWithConstant(INS_add, EA_PTRSIZE, REG_FPBASE, REG_R1, genFuncletInfo.fiFunction_CallerSP_to_FP_delta,
- REG_R2, false);
- }
- else // This is a non-filter funclet
+ // If there is no PSPSym (CoreRT ABI), we are done. Otherwise, we need to set up the PSPSym in the functlet frame.
+ if (compiler->lvaPSPSym != BAD_VAR_NUM)
{
- // X3 is scratch, X2 can also become scratch
+ if (isFilter)
+ {
+ // This is the first block of a filter
+ // Note that register x1 = CallerSP of the containing function
+ // X1 is overwritten by the first Load (new callerSP)
+ // X2 is scratch when we have a large constant offset
+
+ // Load the CallerSP of the main function (stored in the PSP of the dynamically containing funclet or
+ // function)
+ genInstrWithConstant(ins_Load(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_R1,
+ genFuncletInfo.fiCallerSP_to_PSP_slot_delta, REG_R2, false);
+ regSet.verifyRegUsed(REG_R1);
+
+ // Store the PSP value (aka CallerSP)
+ genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R1, REG_SPBASE,
+ genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false);
+
+ // re-establish the frame pointer
+ genInstrWithConstant(INS_add, EA_PTRSIZE, REG_FPBASE, REG_R1,
+ genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_R2, false);
+ }
+ else // This is a non-filter funclet
+ {
+ // X3 is scratch, X2 can also become scratch
- // compute the CallerSP, given the frame pointer. x3 is scratch.
- genInstrWithConstant(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE, -genFuncletInfo.fiFunction_CallerSP_to_FP_delta,
- REG_R2, false);
- regSet.verifyRegUsed(REG_R3);
+ // compute the CallerSP, given the frame pointer. x3 is scratch.
+ genInstrWithConstant(INS_add, EA_PTRSIZE, REG_R3, REG_FPBASE,
+ -genFuncletInfo.fiFunction_CallerSP_to_FP_delta, REG_R2, false);
+ regSet.verifyRegUsed(REG_R3);
- genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
- genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false);
+ genInstrWithConstant(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_R3, REG_SPBASE,
+ genFuncletInfo.fiSP_to_PSP_slot_delta, REG_R2, false);
+ }
}
}
@@ -1103,22 +1209,21 @@ void CodeGen::genFuncletEpilog()
assert((maskRestoreRegsInt & RBM_LR) != 0);
assert((maskRestoreRegsInt & RBM_FP) != 0);
- maskRestoreRegsInt &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end
-
- int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta;
-
- if (genFuncletInfo.fiFrameType == 3)
+ if ((genFuncletInfo.fiFrameType == 3) || (genFuncletInfo.fiFrameType == 5))
{
// Note that genFuncletInfo.fiSpDelta2 is always a negative value
assert(genFuncletInfo.fiSpDelta2 < 0);
// generate add SP,SP,imm
genStackPointerAdjustment(-genFuncletInfo.fiSpDelta2, REG_R2, nullptr);
-
- lowestCalleeSavedOffset += genFuncletInfo.fiSpDelta2;
}
regMaskTP regsToRestoreMask = maskRestoreRegsInt | maskRestoreRegsFloat;
+ if ((genFuncletInfo.fiFrameType == 1) || (genFuncletInfo.fiFrameType == 2) || (genFuncletInfo.fiFrameType == 3))
+ {
+ regsToRestoreMask &= ~(RBM_LR | RBM_FP); // We restore FP/LR at the end
+ }
+ int lowestCalleeSavedOffset = genFuncletInfo.fiSP_to_CalleeSave_delta + genFuncletInfo.fiSpDelta2;
genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, lowestCalleeSavedOffset, 0);
if (genFuncletInfo.fiFrameType == 1)
@@ -1145,14 +1250,34 @@ void CodeGen::genFuncletEpilog()
assert(genFuncletInfo.fiSpDelta2 == 0);
}
- else
+ else if (genFuncletInfo.fiFrameType == 3)
{
- assert(genFuncletInfo.fiFrameType == 3);
-
getEmitter()->emitIns_R_R_R_I(INS_ldp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -genFuncletInfo.fiSpDelta1,
INS_OPTS_POST_INDEX);
compiler->unwindSaveRegPairPreindexed(REG_FP, REG_LR, genFuncletInfo.fiSpDelta1);
}
+ else if (genFuncletInfo.fiFrameType == 4)
+ {
+ // fiFrameType==4 constraints:
+ assert(genFuncletInfo.fiSpDelta1 < 0);
+ assert(genFuncletInfo.fiSpDelta1 >= -512);
+
+ // generate add SP,SP,imm
+ genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_NA, nullptr);
+
+ assert(genFuncletInfo.fiSpDelta2 == 0);
+ }
+ else
+ {
+ assert(genFuncletInfo.fiFrameType == 5);
+ // Same work as fiFrameType==4, but different asserts.
+
+ assert(genFuncletInfo.fiSpDelta1 < 0);
+ assert(genFuncletInfo.fiSpDelta1 >= -240);
+
+ // generate add SP,SP,imm
+ genStackPointerAdjustment(-genFuncletInfo.fiSpDelta1, REG_NA, nullptr);
+ }
inst_RV(INS_ret, REG_LR, TYP_I_IMPL);
compiler->unwindReturn(REG_LR);
@@ -1176,8 +1301,9 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
return;
assert(isFramePointerUsed());
- assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be
- // finalized
+
+ // The frame size and offsets must be finalized
+ assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT);
genFuncletInfo.fiFunction_CallerSP_to_FP_delta = genCallerSPtoFPdelta();
@@ -1207,27 +1333,44 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
int SP_to_PSP_slot_delta;
int CallerSP_to_PSP_slot_delta;
+ unsigned funcletFrameSize = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize;
+ unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
+ assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned);
+
+ unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
+ assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES));
+
if (maxFuncletFrameSizeAligned <= 512)
{
- unsigned funcletFrameSize = saveRegsPlusPSPSize + compiler->lvaOutgoingArgSpaceSize;
- unsigned funcletFrameSizeAligned = roundUp(funcletFrameSize, STACK_ALIGN);
- assert(funcletFrameSizeAligned <= maxFuncletFrameSizeAligned);
-
- unsigned funcletFrameAlignmentPad = funcletFrameSizeAligned - funcletFrameSize;
- assert((funcletFrameAlignmentPad == 0) || (funcletFrameAlignmentPad == REGSIZE_BYTES));
+ if (genSaveFpLrWithAllCalleeSavedRegisters)
+ {
+ SP_to_FPLR_save_delta = funcletFrameSizeAligned - (2 /* FP, LR */ * REGSIZE_BYTES);
+ if (compiler->info.compIsVarArgs)
+ {
+ SP_to_FPLR_save_delta -= MAX_REG_ARG * REGSIZE_BYTES;
+ }
- SP_to_FPLR_save_delta = compiler->lvaOutgoingArgSpaceSize;
- SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + funcletFrameAlignmentPad;
- CallerSP_to_PSP_slot_delta = -(int)(saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES);
+ SP_to_PSP_slot_delta = compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad;
+ CallerSP_to_PSP_slot_delta = -(int)saveRegsPlusPSPSize;
- if (compiler->lvaOutgoingArgSpaceSize == 0)
- {
- genFuncletInfo.fiFrameType = 1;
+ genFuncletInfo.fiFrameType = 4;
}
else
{
- genFuncletInfo.fiFrameType = 2;
+ SP_to_FPLR_save_delta = compiler->lvaOutgoingArgSpaceSize;
+ SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + funcletFrameAlignmentPad;
+ CallerSP_to_PSP_slot_delta = -(int)(saveRegsPlusPSPSize - 2 /* FP, LR */ * REGSIZE_BYTES);
+
+ if (compiler->lvaOutgoingArgSpaceSize == 0)
+ {
+ genFuncletInfo.fiFrameType = 1;
+ }
+ else
+ {
+ genFuncletInfo.fiFrameType = 2;
+ }
}
+
genFuncletInfo.fiSpDelta1 = -(int)funcletFrameSizeAligned;
genFuncletInfo.fiSpDelta2 = 0;
@@ -1238,14 +1381,32 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
unsigned saveRegsPlusPSPAlignmentPad = saveRegsPlusPSPSizeAligned - saveRegsPlusPSPSize;
assert((saveRegsPlusPSPAlignmentPad == 0) || (saveRegsPlusPSPAlignmentPad == REGSIZE_BYTES));
- SP_to_FPLR_save_delta = outgoingArgSpaceAligned;
- SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsPlusPSPAlignmentPad;
- CallerSP_to_PSP_slot_delta =
- -(int)(saveRegsPlusPSPSizeAligned - 2 /* FP, LR */ * REGSIZE_BYTES - saveRegsPlusPSPAlignmentPad);
+ if (genSaveFpLrWithAllCalleeSavedRegisters)
+ {
+ SP_to_FPLR_save_delta = funcletFrameSizeAligned - (2 /* FP, LR */ * REGSIZE_BYTES);
+ if (compiler->info.compIsVarArgs)
+ {
+ SP_to_FPLR_save_delta -= MAX_REG_ARG * REGSIZE_BYTES;
+ }
+
+ SP_to_PSP_slot_delta =
+ compiler->lvaOutgoingArgSpaceSize + funcletFrameAlignmentPad + saveRegsPlusPSPAlignmentPad;
+ CallerSP_to_PSP_slot_delta = -(int)saveRegsPlusPSPSize;
+
+ genFuncletInfo.fiFrameType = 5;
+ }
+ else
+ {
+ SP_to_FPLR_save_delta = outgoingArgSpaceAligned;
+ SP_to_PSP_slot_delta = SP_to_FPLR_save_delta + 2 /* FP, LR */ * REGSIZE_BYTES + saveRegsPlusPSPAlignmentPad;
+ CallerSP_to_PSP_slot_delta =
+ -(int)(saveRegsPlusPSPSizeAligned - 2 /* FP, LR */ * REGSIZE_BYTES - saveRegsPlusPSPAlignmentPad);
+
+ genFuncletInfo.fiFrameType = 3;
+ }
- genFuncletInfo.fiFrameType = 3;
- genFuncletInfo.fiSpDelta1 = -(int)saveRegsPlusPSPSizeAligned;
- genFuncletInfo.fiSpDelta2 = -(int)outgoingArgSpaceAligned;
+ genFuncletInfo.fiSpDelta1 = -(int)saveRegsPlusPSPSizeAligned;
+ genFuncletInfo.fiSpDelta2 = -(int)outgoingArgSpaceAligned;
assert(genFuncletInfo.fiSpDelta1 + genFuncletInfo.fiSpDelta2 == -(int)maxFuncletFrameSizeAligned);
}
@@ -3506,12 +3667,27 @@ void CodeGen::genCodeForJumpCompare(GenTreeOp* tree)
}
}
+//---------------------------------------------------------------------
+// genSPtoFPdelta - return offset from the stack pointer (Initial-SP) to the frame pointer. The frame pointer
+// will point to the saved frame pointer slot (i.e., there will be frame pointer chaining).
+//
int CodeGenInterface::genSPtoFPdelta()
{
- int delta;
+ assert(isFramePointerUsed());
+ int delta = -1; // initialization to illegal value
- // We place the saved frame pointer immediately above the outgoing argument space.
- delta = (int)compiler->lvaOutgoingArgSpaceSize;
+ if (IsSaveFpLrWithAllCalleeSavedRegisters())
+ {
+ // The saved frame pointer is at the top of the frame, just beneath the saved varargs register space and the
+ // saved LR.
+ delta = genTotalFrameSize() - (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) -
+ 2 /* FP, LR */ * REGSIZE_BYTES;
+ }
+ else
+ {
+ // We place the saved frame pointer immediately above the outgoing argument space.
+ delta = (int)compiler->lvaOutgoingArgSpaceSize;
+ }
assert(delta >= 0);
return delta;
@@ -3574,6 +3750,25 @@ int CodeGenInterface::genCallerSPtoInitialSPdelta()
return callerSPtoSPdelta;
}
+//---------------------------------------------------------------------
+// SetSaveFpLrWithAllCalleeSavedRegisters - Set the variable that indicates if FP/LR registers
+// are stored with the rest of the callee-saved registers.
+//
+void CodeGen::SetSaveFpLrWithAllCalleeSavedRegisters(bool value)
+{
+ JITDUMP("Setting genSaveFpLrWithAllCalleeSavedRegisters to %s\n", dspBool(value));
+ genSaveFpLrWithAllCalleeSavedRegisters = value;
+}
+
+//---------------------------------------------------------------------
+// IsSaveFpLrWithAllCalleeSavedRegisters - Return the value that indicates where FP/LR registers
+// are stored in the prolog.
+//
+bool CodeGen::IsSaveFpLrWithAllCalleeSavedRegisters()
+{
+ return genSaveFpLrWithAllCalleeSavedRegisters;
+}
+
/*****************************************************************************
* Emit a call to a helper function.
*
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 214fd35c8d..1ff2871ee7 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -165,6 +165,10 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler)
genInterruptibleUsed = false;
genCurDispOffset = (unsigned)-1;
#endif
+
+#ifdef _TARGET_ARM64_
+ genSaveFpLrWithAllCalleeSavedRegisters = false;
+#endif // _TARGET_ARM64_
}
void CodeGenInterface::genMarkTreeInReg(GenTree* tree, regNumber reg)
@@ -4827,11 +4831,12 @@ void CodeGen::genPushCalleeSavedRegisters()
}
#elif defined(_TARGET_ARM64_)
// See the document "ARM64 JIT Frame Layout" and/or "ARM64 Exception Data" for more details or requirements and
- // options. Case numbers in comments here refer to this document.
+ // options. Case numbers in comments here refer to this document. See also Compiler::lvaAssignFrameOffsets()
+ // for pictures of the general frame layouts, and CodeGen::genFuncletProlog() implementations (per architecture)
+ // for pictures of the funclet frame layouts.
//
// For most frames, generate, e.g.:
- // stp fp, lr, [sp,-0x80]! // predecrement SP with full frame size, and store FP/LR pair. Store pair
- // // ensures stack stays aligned.
+ // stp fp, lr, [sp,-0x80]! // predecrement SP with full frame size, and store FP/LR pair.
// stp r19, r20, [sp, 0x60] // store at positive offset from SP established above, into callee-saved area
// // at top of frame (highest addresses).
// stp r21, r22, [sp, 0x70]
@@ -4843,8 +4848,67 @@ void CodeGen::genPushCalleeSavedRegisters()
// 3. General-purpose registers are 8 bytes, floating-point registers are 16 bytes, but FP/SIMD registers only
// preserve their lower 8 bytes, by calling convention.
// 4. For frames with varargs, we spill the integer register arguments to the stack, so all the arguments are
- // consecutive.
+ // consecutive, and at the top of the frame.
// 5. We allocate the frame here; no further changes to SP are allowed (except in the body, for localloc).
+ //
+ // For functions with GS and localloc, we change the frame so the frame pointer and LR are saved at the top
+ // of the frame, just under the varargs registers (if any). Note that the funclet frames must follow the same
+ // rule, and both main frame and funclet frames (if any) must put PSPSym in the same offset from Caller-SP.
+ // Since this frame type is relatively rare, we force using it via stress modes, for additional coverage.
+ //
+ // The frames look like the following (simplified to only include components that matter for establishing the
+ // frames). See also Compiler::lvaAssignFrameOffsets().
+ //
+ // Frames with FP, LR saved at bottom of frame (above outgoing argument space):
+ //
+ // | |
+ // |-----------------------|
+ // | incoming arguments |
+ // +=======================+ <---- Caller's SP
+ // | Varargs regs space | // Only for varargs functions; 64 bytes
+ // |-----------------------|
+ // |Callee saved registers | // not including FP/LR; multiple of 8 bytes
+ // |-----------------------|
+ // | PSP slot | // 8 bytes (omitted in CoreRT ABI)
+ // |-----------------------|
+ // | locals, temps, etc. |
+ // |-----------------------|
+ // | Saved LR | // 8 bytes
+ // |-----------------------|
+ // | Saved FP | // 8 bytes
+ // |-----------------------|
+ // | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0)
+ // |-----------------------| <---- Ambient SP
+ // | | |
+ // ~ | Stack grows ~
+ // | | downward |
+ // V
+ //
+ // Frames with FP, LR saved at top of frame (below saved varargs incoming arguments):
+ //
+ // | |
+ // |-----------------------|
+ // | incoming arguments |
+ // +=======================+ <---- Caller's SP
+ // | Varargs regs space | // Only for varargs functions; 64 bytes
+ // |-----------------------|
+ // | Saved LR | // 8 bytes
+ // |-----------------------|
+ // | Saved FP | // 8 bytes
+ // |-----------------------|
+ // |Callee saved registers | // not including FP/LR; multiple of 8 bytes
+ // |-----------------------|
+ // | PSP slot | // 8 bytes (omitted in CoreRT ABI)
+ // |-----------------------|
+ // | locals, temps, etc. |
+ // |-----------------------|
+ // | Outgoing arg space | // multiple of 8 bytes; if required (i.e., #outsz != 0)
+ // |-----------------------| <---- Ambient SP
+ // | | |
+ // ~ | Stack grows ~
+ // | | downward |
+ // V
+ //
int totalFrameSize = genTotalFrameSize();
@@ -4853,10 +4917,25 @@ void CodeGen::genPushCalleeSavedRegisters()
regMaskTP maskSaveRegsFloat = rsPushRegs & RBM_ALLFLOAT;
regMaskTP maskSaveRegsInt = rsPushRegs & ~maskSaveRegsFloat;
- int frameType = 0; // This number is arbitrary, is defined below, and corresponds to one of the frame styles we
- // generate based on various sizes.
- int calleeSaveSPDelta = 0;
- int calleeSaveSPDeltaUnaligned = 0;
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("Save float regs: ");
+ dspRegMask(maskSaveRegsFloat);
+ printf("\n");
+ printf("Save int regs: ");
+ dspRegMask(maskSaveRegsInt);
+ printf("\n");
+ }
+#endif // DEBUG
+
+ // The frameType number is arbitrary, is defined below, and corresponds to one of the frame styles we
+ // generate based on various sizes.
+ int frameType = 0;
+
+ // The amount to subtract from SP before starting to store the callee-saved registers. It might be folded into the
+ // first save instruction as a "predecrement" amount, if possible.
+ int calleeSaveSPDelta = 0;
if (isFramePointerUsed())
{
@@ -4865,7 +4944,27 @@ void CodeGen::genPushCalleeSavedRegisters()
assert((maskSaveRegsInt & RBM_FP) != 0);
assert((maskSaveRegsInt & RBM_LR) != 0);
- if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
+ // If we need to generate a GS cookie, we need to make sure the saved frame pointer and return address
+ // (FP and LR) are protected from buffer overrun by the GS cookie. If FP/LR are at the lowest addresses,
+ // then they are safe, since they are lower than any unsafe buffers. And the GS cookie we add will
+ // protect our caller's frame. If we have a localloc, however, that is dynamically placed lower than our
+ // saved FP/LR. In that case, we save FP/LR along with the rest of the callee-saved registers, above
+ // the GS cookie.
+ //
+ // After the frame is allocated, the frame pointer is established, pointing at the saved frame pointer to
+ // create a frame pointer chain.
+ //
+ // Do we need another frame pointer register to get good code quality in the case of having the frame pointer
+ // point high in the frame, so we can take advantage of arm64's preference for positive offsets? C++ native
+ // code dedicates callee-saved x19 to this, so generates:
+ // mov x19, sp
+ // in the prolog, then uses x19 for local var accesses. Given that this case is so rare, we currently do
+ // not do this. That means that negative offsets from FP might need to use the reserved register to form
+ // the local variable offset for an addressing mode.
+
+ // TODO-ARM64-Bug?: should this be "totalFrameSize <= 512"?
+ if (((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512)) &&
+ !genSaveFpLrWithAllCalleeSavedRegisters)
{
// Case #1.
//
@@ -4873,12 +4972,15 @@ void CodeGen::genPushCalleeSavedRegisters()
// stp fp,lr,[sp,#-framesz]!
//
// The (totalFrameSize < 512) condition ensures that both the predecrement
- // and the postincrement of SP can occur with STP.
+ // and the postincrement of SP can occur with STP.
//
// After saving callee-saved registers, we establish the frame pointer with:
// mov fp,sp
// We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
+ JITDUMP("Frame type 1. #outsz=0; #framesz=%d; LclFrameSize=%d\n", totalFrameSize,
+ compiler->compLclFrameSize);
+
frameType = 1;
getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE, -totalFrameSize,
@@ -4892,55 +4994,77 @@ void CodeGen::genPushCalleeSavedRegisters()
{
// Case #2.
//
- // Generate:
- // sub sp,sp,#framesz
- // stp fp,lr,[sp,#outsz] // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496.
- //
- // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP with
- // signed offset encoding.
+ // The (totalFrameSize <= 512) condition ensures the callee-saved registers can all be saved using STP
+ // with signed offset encoding.
//
// After saving callee-saved registers, we establish the frame pointer with:
// add fp,sp,#outsz
// We do this *after* saving callee-saved registers, so the prolog/epilog unwind codes mostly match.
- frameType = 2;
+ if (genSaveFpLrWithAllCalleeSavedRegisters)
+ {
+ JITDUMP("Frame type 4 (save FP/LR at top). #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
+ unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
- assert(compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize);
+ frameType = 4;
- getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
- compiler->unwindAllocStack(totalFrameSize);
+ // The frame will be allocated below, when the callee-saved registers are saved. This might mean a
+ // separate SUB instruction or the SP adjustment might be folded in to the first STP if there is
+ // no outgoing argument space AND no local frame space, that is, if the only thing the frame does
+ // is save callee-saved registers (and possibly varargs argument registers).
+ calleeSaveSPDelta = totalFrameSize;
- getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
- compiler->lvaOutgoingArgSpaceSize);
- compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
+ offset = (int)compiler->compLclFrameSize;
+ }
+ else
+ {
+ JITDUMP("Frame type 2 (save FP/LR at bottom). #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
+ unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
- maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
- offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
+ frameType = 2;
+
+ // Generate:
+ // sub sp,sp,#framesz
+ // stp fp,lr,[sp,#outsz] // note that by necessity, #outsz <= #framesz - 16, so #outsz <= 496.
+
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, totalFrameSize);
+ compiler->unwindAllocStack(totalFrameSize);
+
+ assert(compiler->lvaOutgoingArgSpaceSize + 2 * REGSIZE_BYTES <= (unsigned)totalFrameSize);
+
+ getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_FP, REG_LR, REG_SPBASE,
+ compiler->lvaOutgoingArgSpaceSize);
+ compiler->unwindSaveRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize);
+
+ maskSaveRegsInt &= ~(RBM_FP | RBM_LR); // We've already saved FP/LR
+ offset = (int)compiler->compLclFrameSize + 2 * REGSIZE_BYTES; // 2 for FP/LR
+ }
}
else
{
// Case 5 or 6.
//
- // First, the callee-saved registers will be saved, and the callee-saved register code must use pre-index
- // to subtract from SP as the first instruction. It must also leave space for varargs registers to be
- // stored. For example:
+ // First, the callee-saved registers will be saved, and the callee-saved register code must use
+ // pre-index to subtract from SP as the first instruction. It must also leave space for varargs
+ // registers to be stored. For example:
// stp r19,r20,[sp,#-96]!
// stp d8,d9,[sp,#16]
// ... save varargs incoming integer registers ...
// Note that all SP alterations must be 16-byte aligned. We have already calculated any alignment to be
- // lower on the stack than the callee-saved registers (see lvaAlignFrame() for how we calculate alignment).
- // So, if there is an odd number of callee-saved registers, we use (for example, with just one saved
- // register):
+ // lower on the stack than the callee-saved registers (see lvaAlignFrame() for how we calculate
+ // alignment). So, if there is an odd number of callee-saved registers, we use (for example, with just
+ // one saved register):
// sub sp,sp,#16
// str r19,[sp,#8]
// This is one additional instruction, but it centralizes the aligned space. Otherwise, it might be
// possible to have two 8-byte alignment padding words, one below the callee-saved registers, and one
// above them. If that is preferable, we could implement it.
- // Note that any varargs saved space will always be 16-byte aligned, since there are 8 argument registers.
+ //
+ // Note that any varargs saved space will always be 16-byte aligned, since there are 8 argument
+ // registers.
//
// Then, define #remainingFrameSz = #framesz - (callee-saved size + varargs space + possible alignment
- // padding from above).
- // Note that #remainingFrameSz must not be zero, since we still need to save FP,SP.
+ // padding from above). Note that #remainingFrameSz must not be zero, since we still need to save FP,SP.
//
// Generate:
// sub sp,sp,#remainingFrameSz
@@ -4952,10 +5076,10 @@ void CodeGen::genPushCalleeSavedRegisters()
// stp fp,lr,[sp,#outsz]
// add fp,sp,#outsz
//
- // However, we need to handle the case where #outsz is larger than the constant signed offset encoding can
- // handle. And, once again, we might need to deal with #outsz that is not aligned to 16-bytes (i.e.,
- // STACK_ALIGN). So, in the case of large #outsz we will have an additional SP adjustment, using one of the
- // following sequences:
+ // However, we need to handle the case where #outsz is larger than the constant signed offset encoding
+ // can handle. And, once again, we might need to deal with #outsz that is not aligned to 16-bytes (i.e.,
+ // STACK_ALIGN). So, in the case of large #outsz we will have an additional SP adjustment, using one of
+ // the following sequences:
//
// Define #remainingFrameSz2 = #remainingFrameSz - #outsz.
//
@@ -4966,9 +5090,9 @@ void CodeGen::genPushCalleeSavedRegisters()
//
// Or:
//
- // sub sp,sp,roundUp(#remainingFrameSz2,16) // if #remainingFrameSz2 is not 16-byte aligned (it is
- // // always guaranteed to be 8 byte aligned).
- // stp fp,lr,[sp,#8] // it will always be #8 in the unaligned case
+ // sub sp,sp,roundUp(#remainingFrameSz2,16) // if #remainingFrameSz2 is not 16-byte aligned (it is
+ // // always guaranteed to be 8 byte aligned).
+ // stp fp,lr,[sp,#8] // it will always be #8 in the unaligned case
// add fp,sp,#8
// sub sp,sp,#outsz - #8
//
@@ -4976,21 +5100,47 @@ void CodeGen::genPushCalleeSavedRegisters()
// mov rX, #outsz - #8 // maybe multiple instructions
// sub sp,sp,rX
// )
+ //
+ // Note that even if we align the SP alterations, that does not imply that we are creating empty alignment
+ // slots. In fact, we are not; any empty alignment slots were calculated in
+ // Compiler::lvaAssignFrameOffsets() and its callees.
- frameType = 3;
+ int calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize;
+ if (genSaveFpLrWithAllCalleeSavedRegisters)
+ {
+ JITDUMP("Frame type 5 (save FP/LR at top). #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
+ unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
+
+ // This case is much simpler, because we allocate space for the callee-saved register area, including
+ // FP/LR. Note the SP adjustment might be SUB or be folded into the first store as a predecrement.
+ // Then, we use a single SUB to establish the rest of the frame. We need to be careful about where
+ // to establish the frame pointer, as there is a limit of 2040 bytes offset from SP to FP in the
+ // unwind codes when FP is established.
+ frameType = 5;
+ }
+ else
+ {
+ JITDUMP("Frame type 3 (save FP/LR at bottom). #outsz=%d; #framesz=%d; LclFrameSize=%d\n",
+ unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, compiler->compLclFrameSize);
+
+ frameType = 3;
+
+ calleeSaveSPDeltaUnaligned -= 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later.
+
+ // We'll take care of these later, but callee-saved regs code shouldn't see them.
+ maskSaveRegsInt &= ~(RBM_FP | RBM_LR);
+ }
- calleeSaveSPDeltaUnaligned =
- totalFrameSize - compiler->compLclFrameSize - 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll save later.
assert(calleeSaveSPDeltaUnaligned >= 0);
assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
offset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
- assert((offset == 0) || (offset == REGSIZE_BYTES)); // At most one alignment slot between SP and where we
- // store the callee-saved registers.
- // We'll take care of these later, but callee-saved regs code shouldn't see them.
- maskSaveRegsInt &= ~(RBM_FP | RBM_LR);
+ JITDUMP(" calleeSaveSPDelta=%d, offset=%d\n", calleeSaveSPDelta, offset);
+
+ // At most one alignment slot between SP and where we store the callee-saved registers.
+ assert((offset == 0) || (offset == REGSIZE_BYTES));
}
}
else
@@ -4999,8 +5149,8 @@ void CodeGen::genPushCalleeSavedRegisters()
assert((maskSaveRegsInt & RBM_FP) == 0);
assert((maskSaveRegsInt & RBM_LR) != 0);
- // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using 'stp'
- // if we only have one callee-saved register plus LR to save.
+ // Note that there is no pre-indexed save_lrpair unwind code variant, so we can't allocate the frame using
+ // 'stp' if we only have one callee-saved register plus LR to save.
NYI("Frame without frame pointer");
offset = 0;
@@ -5008,6 +5158,7 @@ void CodeGen::genPushCalleeSavedRegisters()
assert(frameType != 0);
+ JITDUMP(" offset=%d, calleeSaveSPDelta=%d\n", offset, calleeSaveSPDelta);
genSaveCalleeSavedRegistersHelp(maskSaveRegsInt | maskSaveRegsFloat, offset, -calleeSaveSPDelta);
offset += genCountBits(maskSaveRegsInt | maskSaveRegsFloat) * REGSIZE_BYTES;
@@ -5018,6 +5169,8 @@ void CodeGen::genPushCalleeSavedRegisters()
if (compiler->info.compIsVarArgs)
{
+ JITDUMP(" compIsVarArgs=true\n");
+
// There are 8 general-purpose registers to home, thus 'offset' must be 16-byte aligned here.
assert((offset % 16) == 0);
for (regNumber reg1 = REG_ARG_FIRST; reg1 < REG_ARG_LAST; reg1 = REG_NEXT(REG_NEXT(reg1)))
@@ -5030,18 +5183,27 @@ void CodeGen::genPushCalleeSavedRegisters()
}
}
+ // By default, we'll establish the frame pointer chain. (Note that currently frames without FP are NYI.)
+ bool establishFramePointer = true;
+
+ // If we do establish the frame pointer, what is the amount we add to SP to do so?
+ unsigned offsetSpToSavedFp = 0;
+
if (frameType == 1)
{
- getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
- compiler->unwindSetFrameReg(REG_FPBASE, 0);
+ assert(!genSaveFpLrWithAllCalleeSavedRegisters);
+ assert(offsetSpToSavedFp == 0);
}
else if (frameType == 2)
{
- getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
- compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+ assert(!genSaveFpLrWithAllCalleeSavedRegisters);
+
+ offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize;
}
else if (frameType == 3)
{
+ assert(!genSaveFpLrWithAllCalleeSavedRegisters);
+
int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
assert(remainingFrameSz > 0);
assert((remainingFrameSz % 16) == 0); // this is guaranteed to be 16-byte aligned because each component --
@@ -5057,19 +5219,28 @@ void CodeGen::genPushCalleeSavedRegisters()
int alignmentAdjustment2 = spAdjustment2 - spAdjustment2Unaligned;
assert((alignmentAdjustment2 == 0) || (alignmentAdjustment2 == 8));
+ JITDUMP(" spAdjustment2=%d\n", spAdjustment2);
+
genPrologSaveRegPair(REG_FP, REG_LR, alignmentAdjustment2, -spAdjustment2, false, initReg, pInitRegZeroed);
offset += spAdjustment2;
- // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub" included
- // some of it)
+ // Now subtract off the #outsz (or the rest of the #outsz if it was unaligned, and the above "sub"
+ // included some of it)
int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
assert(spAdjustment3 > 0);
assert((spAdjustment3 % 16) == 0);
- getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, alignmentAdjustment2);
- compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
+ JITDUMP(" alignmentAdjustment2=%d\n", alignmentAdjustment2);
+ genEstablishFramePointer(alignmentAdjustment2, /* reportUnwindData */ true);
+
+ // We just established the frame pointer chain; don't do it again.
+ establishFramePointer = false;
+ JITDUMP(" spAdjustment3=%d\n", spAdjustment3);
+
+ // TODO-ARM64-CQ: we're reporting this SUB SP in the unwind info. Do we need to, since we've already
+ // established the frame pointer?
genStackPointerAdjustment(-spAdjustment3, initReg, pInitRegZeroed);
offset += spAdjustment3;
}
@@ -5079,10 +5250,49 @@ void CodeGen::genPushCalleeSavedRegisters()
pInitRegZeroed);
offset += remainingFrameSz;
- getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, compiler->lvaOutgoingArgSpaceSize);
- compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+ offsetSpToSavedFp = compiler->lvaOutgoingArgSpaceSize;
}
}
+ else if (frameType == 4)
+ {
+ assert(genSaveFpLrWithAllCalleeSavedRegisters);
+ offsetSpToSavedFp = calleeSaveSPDelta - (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) -
+ 2 * REGSIZE_BYTES; // -2 for FP, LR
+ }
+ else if (frameType == 5)
+ {
+ assert(genSaveFpLrWithAllCalleeSavedRegisters);
+
+ offsetSpToSavedFp = calleeSaveSPDelta - (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) -
+ 2 * REGSIZE_BYTES; // -2 for FP, LR
+ JITDUMP(" offsetSpToSavedFp=%d\n", offsetSpToSavedFp);
+ genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true);
+
+ // We just established the frame pointer chain; don't do it again.
+ establishFramePointer = false;
+
+ int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
+ assert(remainingFrameSz > 0);
+ assert((remainingFrameSz % 16) == 0); // this is guaranteed to be 16-byte aligned because each component --
+ // totalFrameSize and calleeSaveSPDelta -- is 16-byte aligned.
+
+ JITDUMP(" remainingFrameSz=%d\n", remainingFrameSz);
+
+ // TODO-ARM64-CQ: we're reporting this SUB SP in the unwind info. Do we need to, since we've already
+ // established the frame pointer?
+ genStackPointerAdjustment(-remainingFrameSz, initReg, pInitRegZeroed);
+ offset += remainingFrameSz;
+ }
+ else
+ {
+ unreached();
+ }
+
+ if (establishFramePointer)
+ {
+ JITDUMP(" offsetSpToSavedFp=%d\n", offsetSpToSavedFp);
+ genEstablishFramePointer(offsetSpToSavedFp, /* reportUnwindData */ true);
+ }
assert(offset == totalFrameSize);
@@ -5499,16 +5709,20 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
int totalFrameSize = genTotalFrameSize();
- int calleeSaveSPOffset; // This will be the starting place for restoring the callee-saved registers, in decreasing
- // order.
- int frameType = 0; // An indicator of what type of frame we are popping.
- int calleeSaveSPDelta = 0;
- int calleeSaveSPDeltaUnaligned = 0;
+ int calleeSaveSPOffset = 0; // This will be the starting place for restoring the callee-saved registers, in
+ // decreasing order.
+ int frameType = 0; // An indicator of what type of frame we are popping.
+ int calleeSaveSPDelta = 0; // Amount to add to SP after callee-saved registers have been restored.
if (isFramePointerUsed())
{
- if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512))
+ // TODO-ARM64-Bug?: should this be "totalFrameSize <= 512"?
+ if ((compiler->lvaOutgoingArgSpaceSize == 0) && (totalFrameSize < 512) &&
+ !genSaveFpLrWithAllCalleeSavedRegisters)
{
+ JITDUMP("Frame type 1. #outsz=0; #framesz=%d; localloc? %s\n", totalFrameSize,
+ dspBool(compiler->compLocallocUsed));
+
frameType = 1;
if (compiler->compLocallocUsed)
{
@@ -5520,38 +5734,64 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
- // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
- // of stack.
+ // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the
+ // bottom of stack.
calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
}
else if (totalFrameSize <= 512)
{
- frameType = 2;
if (compiler->compLocallocUsed)
{
// Restore sp from fp
- // sub sp, fp, #outsz
- getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
- compiler->lvaOutgoingArgSpaceSize);
- compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+ // sub sp, fp, #outsz // Uses #outsz if FP/LR stored at bottom
+ int SPtoFPdelta = genSPtoFPdelta();
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, SPtoFPdelta);
+ compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta);
}
- regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
+ if (genSaveFpLrWithAllCalleeSavedRegisters)
+ {
+ JITDUMP("Frame type 4 (save FP/LR at top). #outsz=%d; #framesz=%d; localloc? %s\n",
+ unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize,
+ dspBool(compiler->compLocallocUsed));
- // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the bottom
- // of stack.
- calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
+ frameType = 4;
+
+ calleeSaveSPOffset = compiler->compLclFrameSize;
+
+ // Remove the frame after we're done restoring the callee-saved registers.
+ calleeSaveSPDelta = totalFrameSize;
+ }
+ else
+ {
+ JITDUMP("Frame type 2 (save FP/LR at bottom). #outsz=%d; #framesz=%d; localloc? %s\n",
+ unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize,
+ dspBool(compiler->compLocallocUsed));
+
+ frameType = 2;
+
+ regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and post-index SP.
+
+ // Compute callee save SP offset which is at the top of local frame while the FP/LR is saved at the
+ // bottom of stack.
+ calleeSaveSPOffset = compiler->compLclFrameSize + 2 * REGSIZE_BYTES;
+ }
}
- else
+ else if (!genSaveFpLrWithAllCalleeSavedRegisters)
{
+ JITDUMP("Frame type 3 (save FP/LR at bottom). #outsz=%d; #framesz=%d; localloc? %s\n",
+ unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, dspBool(compiler->compLocallocUsed));
+
frameType = 3;
- calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize -
- 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll restore later.
+ int calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize -
+ 2 * REGSIZE_BYTES; // 2 for FP, LR which we'll restore later.
assert(calleeSaveSPDeltaUnaligned >= 0);
assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
+ JITDUMP(" calleeSaveSPDelta=%d\n", calleeSaveSPDelta);
+
regsToRestoreMask &= ~(RBM_FP | RBM_LR); // We'll restore FP/LR at the end, and (hopefully) post-index SP.
int remainingFrameSz = totalFrameSize - calleeSaveSPDelta;
@@ -5569,8 +5809,8 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
if (compiler->compLocallocUsed)
{
- // Restore sp from fp. No need to update sp after this since we've set up fp before adjusting sp in
- // prolog.
+ // Restore sp from fp. No need to update sp after this since we've set up fp before adjusting sp
+ // in prolog.
// sub sp, fp, #alignmentAdjustment2
getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, alignmentAdjustment2);
compiler->unwindSetFrameReg(REG_FPBASE, alignmentAdjustment2);
@@ -5583,23 +5823,29 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
int spAdjustment3 = compiler->lvaOutgoingArgSpaceSize - alignmentAdjustment2;
assert(spAdjustment3 > 0);
assert((spAdjustment3 % 16) == 0);
+
+ JITDUMP(" spAdjustment3=%d\n", spAdjustment3);
+
genStackPointerAdjustment(spAdjustment3, REG_IP0, nullptr);
}
// Generate:
// ldp fp,lr,[sp]
// add sp,sp,#remainingFrameSz
+
+ JITDUMP(" alignmentAdjustment2=%d\n", alignmentAdjustment2);
genEpilogRestoreRegPair(REG_FP, REG_LR, alignmentAdjustment2, spAdjustment2, false, REG_IP1, nullptr);
}
else
{
if (compiler->compLocallocUsed)
{
- // Restore sp from fp
+ // Restore sp from fp; here that's #outsz from SP
// sub sp, fp, #outsz
- getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE,
- compiler->lvaOutgoingArgSpaceSize);
- compiler->unwindSetFrameReg(REG_FPBASE, compiler->lvaOutgoingArgSpaceSize);
+ int SPtoFPdelta = genSPtoFPdelta();
+ assert(SPtoFPdelta == compiler->lvaOutgoingArgSpaceSize);
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, SPtoFPdelta);
+ compiler->unwindSetFrameReg(REG_FPBASE, SPtoFPdelta);
}
// Generate:
@@ -5607,6 +5853,8 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
// add sp,sp,#remainingFrameSz ; might need to load this constant in a scratch register if
// ; it's large
+ JITDUMP(" remainingFrameSz=%d\n", remainingFrameSz);
+
genEpilogRestoreRegPair(REG_FP, REG_LR, compiler->lvaOutgoingArgSpaceSize, remainingFrameSz, false,
REG_IP1, nullptr);
}
@@ -5617,6 +5865,32 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
assert((calleeSaveSPOffset == 0) || (calleeSaveSPOffset == REGSIZE_BYTES));
}
+ else
+ {
+ JITDUMP("Frame type 5 (save FP/LR at top). #outsz=%d; #framesz=%d; localloc? %s\n",
+ unsigned(compiler->lvaOutgoingArgSpaceSize), totalFrameSize, dspBool(compiler->compLocallocUsed));
+
+ frameType = 5;
+
+ int calleeSaveSPDeltaUnaligned = totalFrameSize - compiler->compLclFrameSize;
+ assert(calleeSaveSPDeltaUnaligned >= 0);
+ assert((calleeSaveSPDeltaUnaligned % 8) == 0); // It better at least be 8 byte aligned.
+ calleeSaveSPDelta = AlignUp((UINT)calleeSaveSPDeltaUnaligned, STACK_ALIGN);
+
+ calleeSaveSPOffset = calleeSaveSPDelta - calleeSaveSPDeltaUnaligned;
+ assert((calleeSaveSPOffset == 0) || (calleeSaveSPOffset == REGSIZE_BYTES));
+
+ // Restore sp from fp:
+ // sub sp, fp, #sp-to-fp-delta
+ // This is the same whether there is localloc or not. Note that we don't need to do anything to remove the
+ // "remainingFrameSz" to reverse the SUB of that amount in the prolog. The unwind codes won't match.
+
+ int offsetSpToSavedFp = calleeSaveSPDelta -
+ (compiler->info.compIsVarArgs ? MAX_REG_ARG * REGSIZE_BYTES : 0) -
+ 2 * REGSIZE_BYTES; // -2 for FP, LR
+ getEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_FPBASE, offsetSpToSavedFp);
+ compiler->unwindSetFrameReg(REG_FPBASE, offsetSpToSavedFp);
+ }
}
else
{
@@ -5625,6 +5899,7 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
calleeSaveSPOffset = 0;
}
+ JITDUMP(" calleeSaveSPOffset=%d, calleeSaveSPDelta=%d\n", calleeSaveSPOffset, calleeSaveSPDelta);
genRestoreCalleeSavedRegistersHelp(regsToRestoreMask, calleeSaveSPOffset, calleeSaveSPDelta);
if (frameType == 1)
@@ -5653,6 +5928,14 @@ void CodeGen::genPopCalleeSavedRegistersAndFreeLclFrame(bool jmpEpilog)
{
// Nothing to do after restoring callee-saved registers.
}
+ else if (frameType == 4)
+ {
+ // Nothing to do after restoring callee-saved registers.
+ }
+ else if (frameType == 5)
+ {
+ // Nothing to do after restoring callee-saved registers.
+ }
else
{
unreached();
@@ -5858,8 +6141,8 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg,
regMask = genFindLowestBit(availMask);
rZero2 = genRegNumFromMask(regMask);
availMask &= ~regMask;
- assert((genRegMask(rZero2) & intRegState.rsCalleeRegArgMaskLiveIn) ==
- 0); // rZero2 is not a live incoming argument reg
+ assert((genRegMask(rZero2) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rZero2 is not a live incoming
+ // argument reg
// We pick the next lowest register number for rAddr
noway_assert(availMask != RBM_NONE);
@@ -5918,8 +6201,8 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg,
if (useLoop)
{
noway_assert(uCntSlots >= 2);
- assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) ==
- 0); // rCnt is not a live incoming argument reg
+ assert((genRegMask(rCnt) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // rCnt is not a live incoming
+ // argument reg
instGen_Set_Reg_To_Imm(EA_PTRSIZE, rCnt, (ssize_t)uCntSlots / 2);
}
@@ -6053,8 +6336,8 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg,
}
else if (genInitStkLclCnt > 0)
{
- assert((genRegMask(initReg) & intRegState.rsCalleeRegArgMaskLiveIn) ==
- 0); // initReg is not a live incoming argument reg
+ assert((genRegMask(initReg) & intRegState.rsCalleeRegArgMaskLiveIn) == 0); // initReg is not a live incoming
+ // argument reg
/* Initialize any lvMustInit vars on the stack */
@@ -7349,6 +7632,22 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
compiler->unwindPadding();
}
+#elif defined(_TARGET_ARM64_)
+
+ if (delta == 0)
+ {
+ getEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_FPBASE, REG_SPBASE);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(INS_add, EA_PTRSIZE, REG_FPBASE, REG_SPBASE, delta);
+ }
+
+ if (reportUnwindData)
+ {
+ compiler->unwindSetFrameReg(REG_FPBASE, delta);
+ }
+
#else
NYI("establish frame pointer");
#endif
@@ -9034,8 +9333,8 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
if (compiler->ehAnyFunclets())
{
assert(isFramePointerUsed());
- assert(compiler->lvaDoneFrameLayout ==
- Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
+ assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be
+ // finalized
// Frame pointer doesn't point at the end, it points at the pushed r11. So, instead
// of adding the number of callee-saved regs to CallerSP, we add 1 for lr and 1 for r11
@@ -9080,10 +9379,11 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
printf(" PSP slot SP offset: %d\n", genFuncletInfo.fiPSP_slot_SP_offset);
printf(" PSP slot Caller SP offset: %d\n", genFuncletInfo.fiPSP_slot_CallerSP_offset);
- if (PSP_slot_CallerSP_offset !=
- compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
+ if (PSP_slot_CallerSP_offset != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym))
+ {
printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
+ }
}
#endif // DEBUG
@@ -9299,8 +9599,8 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
// because we're not going to allocate the same size frame as the parent.
assert(isFramePointerUsed());
- assert(compiler->lvaDoneFrameLayout ==
- Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be finalized
+ assert(compiler->lvaDoneFrameLayout == Compiler::FINAL_FRAME_LAYOUT); // The frame size and offsets must be
+ // finalized
assert(compiler->compCalleeFPRegsSavedMask != (regMaskTP)-1); // The float registers to be preserved is finalized
// Even though lvaToInitialSPRelativeOffset() depends on compLclFrameSize,
diff --git a/src/jit/codegeninterface.h b/src/jit/codegeninterface.h
index 34d2a4fce9..c5d9ec6a69 100644
--- a/src/jit/codegeninterface.h
+++ b/src/jit/codegeninterface.h
@@ -176,6 +176,11 @@ public:
int genSPtoFPdelta();
int genTotalFrameSize();
+#ifdef _TARGET_ARM64_
+ virtual void SetSaveFpLrWithAllCalleeSavedRegisters(bool value) = 0;
+ virtual bool IsSaveFpLrWithAllCalleeSavedRegisters() = 0;
+#endif // _TARGET_ARM64_
+
regNumber genGetThisArgReg(GenTreeCall* call) const;
#ifdef _TARGET_XARCH_
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
index bb2ce7dedd..6123fb126a 100644
--- a/src/jit/compiler.cpp
+++ b/src/jit/compiler.cpp
@@ -49,6 +49,10 @@ AssemblyNamesList2* Compiler::s_pAltJitExcludeAssembliesList = nullpt
// static
bool Compiler::s_pJitDisasmIncludeAssembliesListInitialized = false;
AssemblyNamesList2* Compiler::s_pJitDisasmIncludeAssembliesList = nullptr;
+
+// static
+bool Compiler::s_pJitFunctionFileInitialized = false;
+MethodSet* Compiler::s_pJitMethodSet = nullptr;
#endif // DEBUG
/*****************************************************************************
@@ -3207,6 +3211,11 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
opts.compJitELTHookEnabled = false;
#endif // PROFILING_SUPPORTED
+#if defined(_TARGET_ARM64_)
+ // 0 is default: use the appropriate frame type based on the function.
+ opts.compJitSaveFpLrWithCalleeSavedRegisters = 0;
+#endif // defined(_TARGET_ARM64_)
+
#ifdef DEBUG
opts.dspInstrs = false;
opts.dspEmit = false;
@@ -3418,6 +3427,18 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
memset(compActiveStressModes, 0, sizeof(compActiveStressModes));
+ // Read function list, if not already read, and there exists such a list.
+ if (!s_pJitFunctionFileInitialized)
+ {
+ const wchar_t* functionFileName = JitConfig.JitFunctionFile();
+ if (functionFileName != nullptr)
+ {
+ s_pJitMethodSet =
+ new (HostAllocator::getHostAllocator()) MethodSet(functionFileName, HostAllocator::getHostAllocator());
+ }
+ s_pJitFunctionFileInitialized = true;
+ }
+
#endif // DEBUG
//-------------------------------------------------------------------------
@@ -3673,6 +3694,13 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
}
#endif // UNIX_AMD64_ABI
#endif
+
+#if defined(DEBUG) && defined(_TARGET_ARM64_)
+ if ((s_pJitMethodSet == nullptr) || s_pJitMethodSet->IsActiveMethod(info.compFullName, info.compMethodHash()))
+ {
+ opts.compJitSaveFpLrWithCalleeSavedRegisters = JitConfig.JitSaveFpLrWithCalleeSavedRegisters();
+ }
+#endif // defined(DEBUG) && defined(_TARGET_ARM64_)
}
#ifdef DEBUG
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 940893c0b7..2940bc9d77 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -8486,6 +8486,12 @@ public:
bool compTailCallLoopOpt;
#endif
+#if defined(_TARGET_ARM64_)
+ // Decision about whether to save FP/LR registers with callee-saved registers (see
+ // COMPlus_JitSaveFpLrWithCalleSavedRegisters).
+ int compJitSaveFpLrWithCalleeSavedRegisters;
+#endif // defined(_TARGET_ARM64_)
+
#ifdef ARM_SOFTFP
static const bool compUseSoftFP = true;
#else // !ARM_SOFTFP
@@ -8503,6 +8509,9 @@ public:
#ifdef DEBUG
static bool s_pJitDisasmIncludeAssembliesListInitialized;
static AssemblyNamesList2* s_pJitDisasmIncludeAssembliesList;
+
+ static bool s_pJitFunctionFileInitialized;
+ static MethodSet* s_pJitMethodSet;
#endif // DEBUG
#ifdef DEBUG
diff --git a/src/jit/jitconfigvalues.h b/src/jit/jitconfigvalues.h
index 328ea0c360..842d729111 100644
--- a/src/jit/jitconfigvalues.h
+++ b/src/jit/jitconfigvalues.h
@@ -371,6 +371,37 @@ CONFIG_INTEGER(JitGuardedDevirtualizationGuessUniqueInterface, W("JitGuardedDevi
CONFIG_INTEGER(JitGuardedDevirtualizationGuessBestClass, W("JitGuardedDevirtualizationGuessBestClass"), 1)
#endif // DEBUG
+#if defined(DEBUG)
+// JitFunctionFile: Name of a file that contains a list of functions. If the currently compiled function is in the
+// file, certain other JIT config variables will be active. If the currently compiled function is not in the file,
+// the specific JIT config variables will not be active.
+//
+// Functions are approximately in the format output by JitFunctionTrace, e.g.:
+//
+// System.CLRConfig:GetBoolValue(ref,byref):bool (MethodHash=3c54d35e)
+// -- use the MethodHash, not the function name
+//
+// System.CLRConfig:GetBoolValue(ref,byref):bool
+// -- use just the name
+//
+// Lines with leading ";" "#" or "//" are ignored.
+//
+// If this is unset, then the JIT config values have their normal behavior.
+//
+CONFIG_STRING(JitFunctionFile, W("JitFunctionFile"))
+#endif // DEBUG
+
+#if defined(DEBUG)
+#if defined(_TARGET_ARM64_)
+// JitSaveFpLrWithCalleeSavedRegisters:
+// 0: use default frame type decision
+// 1: disable frames that save FP/LR registers with the callee-saved registers (at the top of the frame)
+// 2: force all frames to use the frame types that save FP/LR registers with the callee-saved registers (at the top
+// of the frame)
+CONFIG_INTEGER(JitSaveFpLrWithCalleeSavedRegisters, W("JitSaveFpLrWithCalleeSavedRegisters"), 0)
+#endif // defined(_TARGET_ARM64_)
+#endif // DEBUG
+
#undef CONFIG_INTEGER
#undef CONFIG_STRING
#undef CONFIG_METHODSET
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index c9029443b8..473ade919d 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -4319,28 +4319,28 @@ unsigned Compiler::lvaGetMaxSpillTempSize()
*
* The frame is laid out as follows for x86:
*
- * ESP frames
+ * ESP frames
*
- * | |
- * |-----------------------|
- * | incoming |
- * | arguments |
- * |-----------------------| <---- Virtual '0'
- * | return address |
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * |-----------------------| <---- Virtual '0'
+ * | return address |
* +=======================+
- * |Callee saved registers |
- * |-----------------------|
- * | Temps |
- * |-----------------------|
- * | Variables |
+ * |Callee saved registers |
+ * |-----------------------|
+ * | Temps |
+ * |-----------------------|
+ * | Variables |
* |-----------------------| <---- Ambient ESP
- * | Arguments for the |
- * ~ next function ~
- * | |
- * | | |
- * | | Stack grows |
- * | downward
- * V
+ * | Arguments for the |
+ * ~ next function ~
+ * | |
+ * | | |
+ * | | Stack grows |
+ * | downward
+ * V
*
*
* EBP frames
@@ -4349,13 +4349,13 @@ unsigned Compiler::lvaGetMaxSpillTempSize()
* |-----------------------|
* | incoming |
* | arguments |
- * |-----------------------| <---- Virtual '0'
- * | return address |
+ * |-----------------------| <---- Virtual '0'
+ * | return address |
* +=======================+
* | incoming EBP |
* |-----------------------| <---- EBP
- * |Callee saved registers |
- * |-----------------------|
+ * |Callee saved registers |
+ * |-----------------------|
* | security object |
* |-----------------------|
* | ParamTypeArg |
@@ -4385,39 +4385,39 @@ unsigned Compiler::lvaGetMaxSpillTempSize()
*
* The frame is laid out as follows for x64:
*
- * RSP frames
- * | |
- * |-----------------------|
- * | incoming |
- * | arguments |
- * |-----------------------|
- * | 4 fixed incoming |
- * | argument slots |
+ * RSP frames
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * |-----------------------|
+ * | 4 fixed incoming |
+ * | argument slots |
* |-----------------------| <---- Caller's SP & Virtual '0'
- * | return address |
+ * | return address |
* +=======================+
- * | Callee saved Int regs |
+ * | Callee saved Int regs |
* -------------------------
* | Padding | <---- this padding (0 or 8 bytes) is to ensure flt registers are saved at a mem location aligned at 16-bytes
* | | so that we can save 128-bit callee saved xmm regs using performant "movaps" instruction instead of "movups"
* -------------------------
* | Callee saved Flt regs | <----- entire 128-bits of callee saved xmm registers are stored here
- * |-----------------------|
- * | Temps |
- * |-----------------------|
- * | Variables |
* |-----------------------|
- * | Arguments for the |
- * ~ next function ~
- * | |
- * |-----------------------|
- * | 4 fixed outgoing |
- * | argument slots |
+ * | Temps |
+ * |-----------------------|
+ * | Variables |
+ * |-----------------------|
+ * | Arguments for the |
+ * ~ next function ~
+ * | |
+ * |-----------------------|
+ * | 4 fixed outgoing |
+ * | argument slots |
* |-----------------------| <---- Ambient RSP
- * | | |
- * ~ | Stack grows ~
- * | | downward |
- * V
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
*
*
* RBP frames
@@ -4425,30 +4425,30 @@ unsigned Compiler::lvaGetMaxSpillTempSize()
* |-----------------------|
* | incoming |
* | arguments |
- * |-----------------------|
- * | 4 fixed incoming |
- * | argument slots |
+ * |-----------------------|
+ * | 4 fixed incoming |
+ * | argument slots |
* |-----------------------| <---- Caller's SP & Virtual '0'
- * | return address |
+ * | return address |
* +=======================+
- * | Callee saved Int regs |
+ * | Callee saved Int regs |
* -------------------------
- * | Padding |
+ * | Padding |
* -------------------------
- * | Callee saved Flt regs |
- * |-----------------------|
+ * | Callee saved Flt regs |
+ * |-----------------------|
* | security object |
* |-----------------------|
* | ParamTypeArg |
* |-----------------------|
* | |
- * | |
+ * | |
* ~ Variables ~
- * | |
+ * | |
* | |
* |-----------------------|
* | Temps |
- * |-----------------------|
+ * |-----------------------|
* | |
* ~ localloc ~ // not in frames with EH
* | |
@@ -4456,31 +4456,31 @@ unsigned Compiler::lvaGetMaxSpillTempSize()
* | PSPSym | // only in frames with EH (thus no localloc)
* | |
* |-----------------------| <---- RBP in localloc frames (max 240 bytes from Initial-SP)
- * | Arguments for the |
- * ~ next function ~
- * | |
- * |-----------------------|
- * | 4 fixed outgoing |
- * | argument slots |
+ * | Arguments for the |
+ * ~ next function ~
+ * | |
+ * |-----------------------|
+ * | 4 fixed outgoing |
+ * | argument slots |
* |-----------------------| <---- Ambient RSP (before localloc, this is Initial-SP)
- * | | |
- * ~ | Stack grows ~
- * | | downward |
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
* V
*
*
* The frame is laid out as follows for ARM (this is a general picture; details may differ for different conditions):
*
- * SP frames
- * | |
- * |-----------------------|
- * | incoming |
- * | arguments |
+ * SP frames
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
* +=======================+ <---- Caller's SP
- * | Pre-spill registers |
+ * | Pre-spill registers |
* |-----------------------| <---- Virtual '0'
- * |Callee saved registers |
- * |-----------------------|
+ * |Callee saved registers |
+ * |-----------------------|
* ~ possible double align ~
* |-----------------------|
* | security object |
@@ -4501,13 +4501,13 @@ unsigned Compiler::lvaGetMaxSpillTempSize()
* |-----------------------|
* ~ possible double align ~
* |-----------------------|
- * | Arguments for the |
- * ~ next function ~
- * | |
+ * | Arguments for the |
+ * ~ next function ~
+ * | |
* |-----------------------| <---- Ambient SP
- * | | |
- * ~ | Stack grows ~
- * | | downward |
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
* V
*
*
@@ -4517,10 +4517,10 @@ unsigned Compiler::lvaGetMaxSpillTempSize()
* | incoming |
* | arguments |
* +=======================+ <---- Caller's SP
- * | Pre-spill registers |
+ * | Pre-spill registers |
* |-----------------------| <---- Virtual '0'
- * |Callee saved registers |
- * |-----------------------|
+ * |Callee saved registers |
+ * |-----------------------|
* | PSPSym | // Only for frames with EH, which means FP-based frames
* |-----------------------|
* ~ possible double align ~
@@ -4545,13 +4545,13 @@ unsigned Compiler::lvaGetMaxSpillTempSize()
* |-----------------------|
* | localloc |
* |-----------------------|
- * | Arguments for the |
- * ~ next function ~
- * | |
+ * | Arguments for the |
+ * ~ next function ~
+ * | |
* |-----------------------| <---- Ambient SP
- * | | |
- * ~ | Stack grows ~
- * | | downward |
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
* V
*
*
@@ -4560,17 +4560,17 @@ unsigned Compiler::lvaGetMaxSpillTempSize()
* NOTE: SP must be 16-byte aligned, so there may be alignment slots in the frame.
* We will often save and establish a frame pointer to create better ETW stack walks.
*
- * SP frames
- * | |
- * |-----------------------|
- * | incoming |
- * | arguments |
+ * SP frames
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
* +=======================+ <---- Caller's SP
* | homed | // this is only needed if reg argument need to be homed, e.g., for varargs
- * | register arguments |
+ * | register arguments |
* |-----------------------| <---- Virtual '0'
* |Callee saved registers |
- * | except fp/lr |
+ * | except fp/lr |
* |-----------------------|
* | security object |
* |-----------------------|
@@ -4591,13 +4591,13 @@ unsigned Compiler::lvaGetMaxSpillTempSize()
* | Saved LR |
* |-----------------------|
* | Saved FP | <---- Frame pointer
- * |-----------------------|
+ * |-----------------------|
* | Stack arguments for |
* | the next function |
* |-----------------------| <---- SP
- * | | |
- * ~ | Stack grows ~
- * | | downward |
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
* V
*
*
@@ -4608,10 +4608,10 @@ unsigned Compiler::lvaGetMaxSpillTempSize()
* | arguments |
* +=======================+ <---- Caller's SP
* | optional homed | // this is only needed if reg argument need to be homed, e.g., for varargs
- * | register arguments |
- * |-----------------------| <---- Virtual '0'
+ * | register arguments |
+ * |-----------------------| <---- Virtual '0'
* |Callee saved registers |
- * | except fp/lr |
+ * | except fp/lr |
* |-----------------------|
* | PSPSym | // Only for frames with EH, which requires FP-based frames
* |-----------------------|
@@ -4640,9 +4640,53 @@ unsigned Compiler::lvaGetMaxSpillTempSize()
* | Stack arguments for |
* | the next function |
* |-----------------------| <---- Ambient SP
- * | | |
- * ~ | Stack grows ~
- * | | downward |
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
+ * V
+ *
+ *
+ * FP (R29 / x29) frames where FP/LR are stored at the top of the frame (frames requiring GS that have localloc)
+ * | |
+ * |-----------------------|
+ * | incoming |
+ * | arguments |
+ * +=======================+ <---- Caller's SP
+ * | optional homed | // this is only needed if reg argument need to be homed, e.g., for varargs
+ * | register arguments |
+ * |-----------------------| <---- Virtual '0'
+ * | Saved LR |
+ * |-----------------------|
+ * | Saved FP | <---- Frame pointer
+ * |-----------------------|
+ * |Callee saved registers |
+ * |-----------------------|
+ * | PSPSym | // Only for frames with EH, which requires FP-based frames
+ * |-----------------------|
+ * | security object |
+ * |-----------------------|
+ * | ParamTypeArg |
+ * |-----------------------|
+ * | possible GS cookie |
+ * |-----------------------|
+ * | Variables |
+ * |-----------------------|
+ * | possible GS cookie |
+ * |-----------------------|
+ * | Temps |
+ * |-----------------------|
+ * | Stub Argument Var |
+ * |-----------------------|
+ * |Inlined PInvoke Frame V|
+ * |-----------------------|
+ * ~ localloc ~
+ * |-----------------------|
+ * | Stack arguments for |
+ * | the next function |
+ * |-----------------------| <---- Ambient SP
+ * | | |
+ * ~ | Stack grows ~
+ * | | downward |
* V
*
*
@@ -5576,6 +5620,30 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
codeGen->setFramePointerUsed(codeGen->isFramePointerRequired());
}
+#ifdef _TARGET_ARM64_
+ // Decide where to save FP and LR registers. We store FP/LR registers at the bottom of the frame if there is
+ // a frame pointer used (so we get positive offsets from the frame pointer to access locals), but not if we
+ // need a GS cookie AND localloc is used, since we need the GS cookie to protect the saved return value,
+ // and also the saved frame pointer. See CodeGen::genPushCalleeSavedRegisters() for more details about the
+ // frame types. Since saving FP/LR at high addresses is a relatively rare case, force using it during stress.
+ // (It should be legal to use these frame types for every frame).
+
+ if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 0)
+ {
+ // Default configuration
+ codeGen->SetSaveFpLrWithAllCalleeSavedRegisters((getNeedsGSSecurityCookie() && compLocallocUsed) ||
+ compStressCompile(STRESS_GENERIC_VARN, 20));
+ }
+ else if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 1)
+ {
+ codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(false); // Disable using new frames
+ }
+ else if (opts.compJitSaveFpLrWithCalleeSavedRegisters == 2)
+ {
+ codeGen->SetSaveFpLrWithAllCalleeSavedRegisters(true); // Force using new frames
+ }
+#endif // _TARGET_ARM64_
+
#ifdef _TARGET_XARCH_
// On x86/amd64, the return address has already been pushed by the call instruction in the caller.
stkOffs -= TARGET_POINTER_SIZE; // return address;
@@ -5618,15 +5686,16 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
stkOffs -= initialStkOffs;
}
- if (isFramePointerUsed())
+ if (codeGen->IsSaveFpLrWithAllCalleeSavedRegisters() ||
+ !isFramePointerUsed()) // Note that currently we always have a frame pointer
{
- // Subtract off FP and LR.
- assert(compCalleeRegsPushed >= 2);
- stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
+ stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
}
else
{
- stkOffs -= compCalleeRegsPushed * REGSIZE_BYTES;
+ // Subtract off FP and LR.
+ assert(compCalleeRegsPushed >= 2);
+ stkOffs -= (compCalleeRegsPushed - 2) * REGSIZE_BYTES;
}
#else // !_TARGET_ARM64_
@@ -6207,7 +6276,8 @@ void Compiler::lvaAssignVirtualFrameOffsetsToLocals()
#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_AMD64_)
#ifdef _TARGET_ARM64_
- if (isFramePointerUsed())
+ if (!codeGen->IsSaveFpLrWithAllCalleeSavedRegisters() &&
+ isFramePointerUsed()) // Note that currently we always have a frame pointer
{
// Create space for saving FP and LR.
stkOffs -= 2 * REGSIZE_BYTES;
diff --git a/src/jit/utils.cpp b/src/jit/utils.cpp
index ae7dd60ad4..2010678242 100644
--- a/src/jit/utils.cpp
+++ b/src/jit/utils.cpp
@@ -1557,6 +1557,194 @@ bool AssemblyNamesList2::IsInList(const char* assemblyName)
return false;
}
+//=============================================================================
+// MethodSet
+//=============================================================================
+
+MethodSet::MethodSet(const wchar_t* filename, HostAllocator alloc) : m_pInfos(nullptr), m_alloc(alloc)
+{
+ FILE* methodSetFile = _wfopen(filename, W("r"));
+ if (methodSetFile == nullptr)
+ {
+ return;
+ }
+
+ MethodInfo* lastInfo = m_pInfos;
+ char buffer[1024];
+
+ while (true)
+ {
+ // Get next line
+ if (fgets(buffer, sizeof(buffer), methodSetFile) == nullptr)
+ {
+ break;
+ }
+
+ // Ignore lines starting with leading ";" "#" "//".
+ if ((0 == _strnicmp(buffer, ";", 1)) || (0 == _strnicmp(buffer, "#", 1)) || (0 == _strnicmp(buffer, "//", 2)))
+ {
+ continue;
+ }
+
+ // Remove trailing newline, if any.
+ char* p = strpbrk(buffer, "\r\n");
+ if (p != nullptr)
+ {
+ *p = '\0';
+ }
+
+ char* methodName;
+ unsigned methodHash = 0;
+
+ // Parse the line. Very simple. One of:
+ //
+ // <method-name>
+ // <method-name><whitespace>(MethodHash=<hash>)
+
+ const char methodHashPattern[] = " (MethodHash=";
+ p = strstr(buffer, methodHashPattern);
+ if (p == nullptr)
+ {
+ // Just use it without the hash.
+ methodName = _strdup(buffer);
+ }
+ else
+ {
+ // There's a method hash; use that.
+
+ // First, get the method name.
+ char* p2 = p;
+ *p = '\0';
+
+ // Null terminate method at first whitespace. (Don't have any leading whitespace!)
+ p = strpbrk(buffer, " \t");
+ if (p != nullptr)
+ {
+ *p = '\0';
+ }
+ methodName = _strdup(buffer);
+
+ // Now get the method hash.
+ p2 += strlen(methodHashPattern);
+ char* p3 = strchr(p2, ')');
+ if (p3 == nullptr)
+ {
+ // Malformed line: no trailing slash.
+ JITDUMP("Couldn't parse: %s\n", p2);
+ // We can still just use the method name.
+ }
+ else
+ {
+ // Convert the slash to null.
+ *p3 = '\0';
+
+ // Now parse it as hex.
+ int count = sscanf_s(p2, "%x", &methodHash);
+ if (count != 1)
+ {
+ JITDUMP("Couldn't parse: %s\n", p2);
+ // Still, use the method name.
+ }
+ }
+ }
+
+ MethodInfo* newInfo = new (m_alloc) MethodInfo(methodName, methodHash);
+ if (m_pInfos == nullptr)
+ {
+ m_pInfos = lastInfo = newInfo;
+ }
+ else
+ {
+ lastInfo->m_next = newInfo;
+ lastInfo = newInfo;
+ }
+ }
+
+ if (m_pInfos == nullptr)
+ {
+ JITDUMP("No methods read from %ws\n", filename);
+ }
+ else
+ {
+ JITDUMP("Methods read from %ws:\n", filename);
+
+ int methodCount = 0;
+ for (MethodInfo* pInfo = m_pInfos; pInfo != nullptr; pInfo = pInfo->m_next)
+ {
+ JITDUMP(" %s (MethodHash: %x)\n", pInfo->m_MethodName, pInfo->m_MethodHash);
+ ++methodCount;
+ }
+
+ if (methodCount > 100)
+ {
+ JITDUMP("Warning: high method count (%d) for MethodSet with linear search lookups might be slow\n",
+ methodCount);
+ }
+ }
+}
+
+MethodSet::~MethodSet()
+{
+ for (MethodInfo* pInfo = m_pInfos; pInfo != nullptr; /**/)
+ {
+ MethodInfo* cur = pInfo;
+ pInfo = pInfo->m_next;
+
+ m_alloc.deallocate(cur->m_MethodName);
+ m_alloc.deallocate(cur);
+ }
+}
+
+// TODO: make this more like JitConfigValues::MethodSet::contains()?
+bool MethodSet::IsInSet(const char* methodName)
+{
+ for (MethodInfo* pInfo = m_pInfos; pInfo != nullptr; pInfo = pInfo->m_next)
+ {
+ if (_stricmp(pInfo->m_MethodName, methodName) == 0)
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool MethodSet::IsInSet(int methodHash)
+{
+ for (MethodInfo* pInfo = m_pInfos; pInfo != nullptr; pInfo = pInfo->m_next)
+ {
+ if (pInfo->m_MethodHash == methodHash)
+ {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+bool MethodSet::IsActiveMethod(const char* methodName, int methodHash)
+{
+ if (methodHash != 0)
+ {
+ // Use the method hash.
+ if (IsInSet(methodHash))
+ {
+ JITDUMP("Method active in MethodSet (hash match): %s Hash: %x\n", methodName, methodHash);
+ return true;
+ }
+ }
+
+ // Else, fall back and use the method name.
+ assert(methodName != nullptr);
+ if (IsInSet(methodName))
+ {
+ JITDUMP("Method active in MethodSet (name match): %s Hash: %x\n", methodName, methodHash);
+ return true;
+ }
+
+ return false;
+}
+
#ifdef FEATURE_JIT_METHOD_PERF
CycleCount::CycleCount() : cps(CycleTimer::CyclesPerSecond())
{
diff --git a/src/jit/utils.h b/src/jit/utils.h
index ec3b0e3e32..1bc3daf8b3 100644
--- a/src/jit/utils.h
+++ b/src/jit/utils.h
@@ -550,6 +550,60 @@ public:
}
};
+// MethodSet: Manage a list of methods that is read from a file.
+//
+// Methods are approximately in the format output by JitFunctionTrace, e.g.:
+//
+// System.CLRConfig:GetBoolValue(ref,byref):bool (MethodHash=3c54d35e)
+// -- use the MethodHash, not the method name
+//
+// System.CLRConfig:GetBoolValue(ref,byref):bool
+// -- use just the name
+//
+// Method names should not have any leading whitespace.
+//
+// TODO: Should this be more related to JitConfigValues::MethodSet?
+//
+class MethodSet
+{
+ // TODO: use a hash table? or two: one on hash value, one on function name
+ struct MethodInfo
+ {
+ char* m_MethodName;
+ int m_MethodHash;
+ MethodInfo* m_next;
+
+ MethodInfo(char* methodName, int methodHash)
+ : m_MethodName(methodName), m_MethodHash(methodHash), m_next(nullptr)
+ {
+ }
+ };
+
+ MethodInfo* m_pInfos; // List of function info
+ HostAllocator m_alloc; // HostAllocator to use in this class
+
+public:
+ // Take a Unicode string with the filename containing a list of function names, parse it, and store it.
+ MethodSet(const wchar_t* filename, HostAllocator alloc);
+
+ ~MethodSet();
+
+ // Return 'true' if 'functionName' (in UTF-8 format) is in the stored set of assembly names.
+ bool IsInSet(const char* functionName);
+
+ // Return 'true' if 'functionHash' (in UTF-8 format) is in the stored set of assembly names.
+ bool IsInSet(int functionHash);
+
+ // Return 'true' if this method is active. Prefer non-zero methodHash for check over (non-null) methodName.
+ bool IsActiveMethod(const char* methodName, int methodHash);
+
+ // Return 'true' if the assembly name set is empty.
+ bool IsEmpty()
+ {
+ return m_pInfos == nullptr;
+ }
+};
+
#ifdef FEATURE_JIT_METHOD_PERF
// When Start() is called time is noted and when ElapsedTime
// is called we know how much time was spent in msecs.