summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
authorBruce Forstall <brucefo@microsoft.com>2018-07-27 09:25:54 -0700
committerGitHub <noreply@github.com>2018-07-27 09:25:54 -0700
commit6e86a0e487fbafb582628b07ca6613798111968c (patch)
treeabffc40405a9c6dc1f04710cdf89a4af8f7fe865 /src/jit
parent2b91f03ecd2834768f960d094cdc2445aa879073 (diff)
parent7cd8f70d30963df4aa85203ba5f39f41285b2cd3 (diff)
downloadcoreclr-6e86a0e487fbafb582628b07ca6613798111968c.tar.gz
coreclr-6e86a0e487fbafb582628b07ca6613798111968c.tar.bz2
coreclr-6e86a0e487fbafb582628b07ca6613798111968c.zip
Merge pull request #19154 from BruceForstall/FixArm64Localloc
Fix overallocation of arm64 small constant localloc
Diffstat (limited to 'src/jit')
-rw-r--r--src/jit/codegenarm.cpp13
-rw-r--r--src/jit/codegenarm64.cpp16
-rw-r--r--src/jit/codegenxarch.cpp6
-rw-r--r--src/jit/lsraarm.cpp8
-rw-r--r--src/jit/lsraarm64.cpp8
-rw-r--r--src/jit/target.h11
6 files changed, 32 insertions, 30 deletions
diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp
index 67a609ca90..85d3e370cd 100644
--- a/src/jit/codegenarm.cpp
+++ b/src/jit/codegenarm.cpp
@@ -360,16 +360,19 @@ void CodeGen::genLclHeap(GenTree* tree)
size_t amount = size->gtIntCon.gtIconVal;
amount = AlignUp(amount, STACK_ALIGN);
- // For small allocations we will generate up to four stp instructions
- size_t cntStackAlignedWidthItems = (amount >> STACK_ALIGN_SHIFT);
- if (cntStackAlignedWidthItems <= 4)
+ // For small allocations we will generate up to four push instructions (either 2 or 4, exactly,
+ // since STACK_ALIGN is 8, and REGSIZE_BYTES is 4).
+ static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
+ assert(amount % REGSIZE_BYTES == 0);
+ size_t pushCount = amount / REGSIZE_BYTES;
+ if (pushCount <= 4)
{
instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt);
- while (cntStackAlignedWidthItems != 0)
+ while (pushCount != 0)
{
inst_IV(INS_push, (unsigned)genRegMask(regCnt));
- cntStackAlignedWidthItems -= 1;
+ pushCount -= 1;
}
goto ALLOC_DONE;
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index 8bb04c27f2..70d3a8f01b 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -1887,7 +1887,7 @@ void CodeGen::genLclHeap(GenTree* tree)
goto BAILOUT;
}
- // 'amount' is the total numbe of bytes to localloc to properly STACK_ALIGN
+ // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN
amount = AlignUp(amount, STACK_ALIGN);
}
else
@@ -1965,16 +1965,18 @@ void CodeGen::genLclHeap(GenTree* tree)
// We should reach here only for non-zero, constant size allocations.
assert(amount > 0);
- // For small allocations we will generate up to four stp instructions
- size_t cntStackAlignedWidthItems = (amount >> STACK_ALIGN_SHIFT);
- if (cntStackAlignedWidthItems <= 4)
+ // For small allocations we will generate up to four stp instructions, to zero 16 to 64 bytes.
+ static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
+ assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time
+ size_t stpCount = amount / (REGSIZE_BYTES * 2);
+ if (stpCount <= 4)
{
- while (cntStackAlignedWidthItems != 0)
+ while (stpCount != 0)
{
// We can use pre-indexed addressing.
- // stp ZR, ZR, [SP, #-16]!
+ // stp ZR, ZR, [SP, #-16]! // STACK_ALIGN is 16
getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX);
- cntStackAlignedWidthItems -= 1;
+ stpCount -= 1;
}
goto ALLOC_DONE;
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 1ceb928778..c28e60412a 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -2241,13 +2241,15 @@ void CodeGen::genLclHeap(GenTree* tree)
if (compiler->info.compInitMem)
{
// Convert the count from a count of bytes to a loop count. We will loop once per
- // stack alignment size, so each loop will zero 4 bytes on x86 and 16 bytes on x64.
+ // stack alignment size, so each loop will zero 4 bytes on Windows/x86, and 16 bytes
+ // on x64 and Linux/x86.
+ //
// Note that we zero a single reg-size word per iteration on x86, and 2 reg-size
// words per iteration on x64. We will shift off all the stack alignment bits
// added above, so there is no need for an 'and' instruction.
// --- shr regCnt, 2 (or 4) ---
- inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT_ALL);
+ inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT);
}
else
{
diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp
index a5f4e98813..8d87794d1f 100644
--- a/src/jit/lsraarm.cpp
+++ b/src/jit/lsraarm.cpp
@@ -63,11 +63,11 @@ int LinearScan::BuildLclHeap(GenTree* tree)
}
else
{
- sizeVal = AlignUp(sizeVal, STACK_ALIGN);
- size_t cntStackAlignedWidthItems = (sizeVal >> STACK_ALIGN_SHIFT);
+ sizeVal = AlignUp(sizeVal, STACK_ALIGN);
+ size_t pushCount = sizeVal / REGSIZE_BYTES;
- // For small allocations up to 4 store instructions
- if (cntStackAlignedWidthItems <= 4)
+ // For small allocations we use up to 4 push instructions
+ if (pushCount <= 4)
{
internalIntCount = 0;
}
diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp
index 6fe9d06d4e..e5a6a87ba6 100644
--- a/src/jit/lsraarm64.cpp
+++ b/src/jit/lsraarm64.cpp
@@ -593,12 +593,12 @@ int LinearScan::BuildNode(GenTree* tree)
// Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
// This should also help in debugging as we can examine the original size specified with
// localloc.
- sizeVal = AlignUp(sizeVal, STACK_ALIGN);
- size_t cntStackAlignedWidthItems = (sizeVal >> STACK_ALIGN_SHIFT);
+ sizeVal = AlignUp(sizeVal, STACK_ALIGN);
+ size_t stpCount = sizeVal / (REGSIZE_BYTES * 2);
- // For small allocations upto 4 'stp' instructions (i.e. 64 bytes of localloc)
+ // For small allocations up to 4 'stp' instructions (i.e. 16 to 64 bytes of localloc)
//
- if (cntStackAlignedWidthItems <= 4)
+ if (stpCount <= 4)
{
// Need no internal registers
}
diff --git a/src/jit/target.h b/src/jit/target.h
index d3520d0220..6cdbe4bd32 100644
--- a/src/jit/target.h
+++ b/src/jit/target.h
@@ -340,12 +340,10 @@ typedef unsigned char regNumberSmall;
#define CODE_ALIGN 1 // code alignment requirement
#if !defined(UNIX_X86_ABI)
#define STACK_ALIGN 4 // stack alignment requirement
- #define STACK_ALIGN_SHIFT 2 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
- #define STACK_ALIGN_SHIFT_ALL 2 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units
+ #define STACK_ALIGN_SHIFT 2 // Shift-right amount to convert size in bytes to size in STACK_ALIGN units == log2(STACK_ALIGN)
#else
#define STACK_ALIGN 16 // stack alignment requirement
- #define STACK_ALIGN_SHIFT 4 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
- #define STACK_ALIGN_SHIFT_ALL 4 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units
+ #define STACK_ALIGN_SHIFT 4 // Shift-right amount to convert size in bytes to size in STACK_ALIGN units == log2(STACK_ALIGN)
#endif // !UNIX_X86_ABI
#define RBM_INT_CALLEE_SAVED (RBM_EBX|RBM_ESI|RBM_EDI)
@@ -602,8 +600,7 @@ typedef unsigned char regNumberSmall;
#define CODE_ALIGN 1 // code alignment requirement
#define STACK_ALIGN 16 // stack alignment requirement
- #define STACK_ALIGN_SHIFT 3 // Shift-right amount to convert stack size in bytes to size in pointer sized words
- #define STACK_ALIGN_SHIFT_ALL 4 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units
+ #define STACK_ALIGN_SHIFT 4 // Shift-right amount to convert size in bytes to size in STACK_ALIGN units == log2(STACK_ALIGN)
#if ETW_EBP_FRAMED
#define RBM_ETW_FRAMED_EBP RBM_NONE
@@ -958,7 +955,6 @@ typedef unsigned char regNumberSmall;
#define CODE_ALIGN 2 // code alignment requirement
#define STACK_ALIGN 8 // stack alignment requirement
- #define STACK_ALIGN_SHIFT 2 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
#define RBM_INT_CALLEE_SAVED (RBM_R4|RBM_R5|RBM_R6|RBM_R7|RBM_R8|RBM_R9|RBM_R10)
#define RBM_INT_CALLEE_TRASH (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R12|RBM_LR)
@@ -1266,7 +1262,6 @@ typedef unsigned char regNumberSmall;
#define CODE_ALIGN 4 // code alignment requirement
#define STACK_ALIGN 16 // stack alignment requirement
- #define STACK_ALIGN_SHIFT 3 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
#define RBM_INT_CALLEE_SAVED (RBM_R19|RBM_R20|RBM_R21|RBM_R22|RBM_R23|RBM_R24|RBM_R25|RBM_R26|RBM_R27|RBM_R28)
#define RBM_INT_CALLEE_TRASH (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R4|RBM_R5|RBM_R6|RBM_R7|RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_R12|RBM_R13|RBM_R14|RBM_R15|RBM_IP0|RBM_IP1|RBM_LR)