diff options
author | Bruce Forstall <brucefo@microsoft.com> | 2018-07-27 09:25:54 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-07-27 09:25:54 -0700 |
commit | 6e86a0e487fbafb582628b07ca6613798111968c (patch) | |
tree | abffc40405a9c6dc1f04710cdf89a4af8f7fe865 /src/jit | |
parent | 2b91f03ecd2834768f960d094cdc2445aa879073 (diff) | |
parent | 7cd8f70d30963df4aa85203ba5f39f41285b2cd3 (diff) | |
download | coreclr-6e86a0e487fbafb582628b07ca6613798111968c.tar.gz coreclr-6e86a0e487fbafb582628b07ca6613798111968c.tar.bz2 coreclr-6e86a0e487fbafb582628b07ca6613798111968c.zip |
Merge pull request #19154 from BruceForstall/FixArm64Localloc
Fix overallocation of arm64 small constant localloc
Diffstat (limited to 'src/jit')
-rw-r--r-- | src/jit/codegenarm.cpp | 13 | ||||
-rw-r--r-- | src/jit/codegenarm64.cpp | 16 | ||||
-rw-r--r-- | src/jit/codegenxarch.cpp | 6 | ||||
-rw-r--r-- | src/jit/lsraarm.cpp | 8 | ||||
-rw-r--r-- | src/jit/lsraarm64.cpp | 8 | ||||
-rw-r--r-- | src/jit/target.h | 11 |
6 files changed, 32 insertions, 30 deletions
diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp index 67a609ca90..85d3e370cd 100644 --- a/src/jit/codegenarm.cpp +++ b/src/jit/codegenarm.cpp @@ -360,16 +360,19 @@ void CodeGen::genLclHeap(GenTree* tree) size_t amount = size->gtIntCon.gtIconVal; amount = AlignUp(amount, STACK_ALIGN); - // For small allocations we will generate up to four stp instructions - size_t cntStackAlignedWidthItems = (amount >> STACK_ALIGN_SHIFT); - if (cntStackAlignedWidthItems <= 4) + // For small allocations we will generate up to four push instructions (either 2 or 4, exactly, + // since STACK_ALIGN is 8, and REGSIZE_BYTES is 4). + static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2)); + assert(amount % REGSIZE_BYTES == 0); + size_t pushCount = amount / REGSIZE_BYTES; + if (pushCount <= 4) { instGen_Set_Reg_To_Zero(EA_PTRSIZE, regCnt); - while (cntStackAlignedWidthItems != 0) + while (pushCount != 0) { inst_IV(INS_push, (unsigned)genRegMask(regCnt)); - cntStackAlignedWidthItems -= 1; + pushCount -= 1; } goto ALLOC_DONE; diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 8bb04c27f2..70d3a8f01b 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -1887,7 +1887,7 @@ void CodeGen::genLclHeap(GenTree* tree) goto BAILOUT; } - // 'amount' is the total numbe of bytes to localloc to properly STACK_ALIGN + // 'amount' is the total number of bytes to localloc to properly STACK_ALIGN amount = AlignUp(amount, STACK_ALIGN); } else @@ -1965,16 +1965,18 @@ void CodeGen::genLclHeap(GenTree* tree) // We should reach here only for non-zero, constant size allocations. assert(amount > 0); - // For small allocations we will generate up to four stp instructions - size_t cntStackAlignedWidthItems = (amount >> STACK_ALIGN_SHIFT); - if (cntStackAlignedWidthItems <= 4) + // For small allocations we will generate up to four stp instructions, to zero 16 to 64 bytes. + static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2)); + assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time + size_t stpCount = amount / (REGSIZE_BYTES * 2); + if (stpCount <= 4) { - while (cntStackAlignedWidthItems != 0) + while (stpCount != 0) { // We can use pre-indexed addressing. - // stp ZR, ZR, [SP, #-16]! + // stp ZR, ZR, [SP, #-16]! // STACK_ALIGN is 16 getEmitter()->emitIns_R_R_R_I(INS_stp, EA_PTRSIZE, REG_ZR, REG_ZR, REG_SPBASE, -16, INS_OPTS_PRE_INDEX); - cntStackAlignedWidthItems -= 1; + stpCount -= 1; } goto ALLOC_DONE; diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp index 1ceb928778..c28e60412a 100644 --- a/src/jit/codegenxarch.cpp +++ b/src/jit/codegenxarch.cpp @@ -2241,13 +2241,15 @@ void CodeGen::genLclHeap(GenTree* tree) if (compiler->info.compInitMem) { // Convert the count from a count of bytes to a loop count. We will loop once per - // stack alignment size, so each loop will zero 4 bytes on x86 and 16 bytes on x64. + // stack alignment size, so each loop will zero 4 bytes on Windows/x86, and 16 bytes + // on x64 and Linux/x86. + // // Note that we zero a single reg-size word per iteration on x86, and 2 reg-size // words per iteration on x64. We will shift off all the stack alignment bits // added above, so there is no need for an 'and' instruction. // --- shr regCnt, 2 (or 4) --- - inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT_ALL); + inst_RV_SH(INS_SHIFT_RIGHT_LOGICAL, EA_PTRSIZE, regCnt, STACK_ALIGN_SHIFT); } else { diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp index a5f4e98813..8d87794d1f 100644 --- a/src/jit/lsraarm.cpp +++ b/src/jit/lsraarm.cpp @@ -63,11 +63,11 @@ int LinearScan::BuildLclHeap(GenTree* tree) } else { - sizeVal = AlignUp(sizeVal, STACK_ALIGN); - size_t cntStackAlignedWidthItems = (sizeVal >> STACK_ALIGN_SHIFT); + sizeVal = AlignUp(sizeVal, STACK_ALIGN); + size_t pushCount = sizeVal / REGSIZE_BYTES; - // For small allocations up to 4 store instructions - if (cntStackAlignedWidthItems <= 4) + // For small allocations we use up to 4 push instructions + if (pushCount <= 4) { internalIntCount = 0; } diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp index 6fe9d06d4e..e5a6a87ba6 100644 --- a/src/jit/lsraarm64.cpp +++ b/src/jit/lsraarm64.cpp @@ -593,12 +593,12 @@ int LinearScan::BuildNode(GenTree* tree) // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size. // This should also help in debugging as we can examine the original size specified with // localloc. - sizeVal = AlignUp(sizeVal, STACK_ALIGN); - size_t cntStackAlignedWidthItems = (sizeVal >> STACK_ALIGN_SHIFT); + sizeVal = AlignUp(sizeVal, STACK_ALIGN); + size_t stpCount = sizeVal / (REGSIZE_BYTES * 2); - // For small allocations upto 4 'stp' instructions (i.e. 64 bytes of localloc) + // For small allocations up to 4 'stp' instructions (i.e. 16 to 64 bytes of localloc) // - if (cntStackAlignedWidthItems <= 4) + if (stpCount <= 4) { // Need no internal registers } diff --git a/src/jit/target.h b/src/jit/target.h index d3520d0220..6cdbe4bd32 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -340,12 +340,10 @@ typedef unsigned char regNumberSmall; #define CODE_ALIGN 1 // code alignment requirement #if !defined(UNIX_X86_ABI) #define STACK_ALIGN 4 // stack alignment requirement - #define STACK_ALIGN_SHIFT 2 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs - #define STACK_ALIGN_SHIFT_ALL 2 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units + #define STACK_ALIGN_SHIFT 2 // Shift-right amount to convert size in bytes to size in STACK_ALIGN units == log2(STACK_ALIGN) #else #define STACK_ALIGN 16 // stack alignment requirement - #define STACK_ALIGN_SHIFT 4 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs - #define STACK_ALIGN_SHIFT_ALL 4 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units + #define STACK_ALIGN_SHIFT 4 // Shift-right amount to convert size in bytes to size in STACK_ALIGN units == log2(STACK_ALIGN) #endif // !UNIX_X86_ABI #define RBM_INT_CALLEE_SAVED (RBM_EBX|RBM_ESI|RBM_EDI) @@ -602,8 +600,7 @@ typedef unsigned char regNumberSmall; #define CODE_ALIGN 1 // code alignment requirement #define STACK_ALIGN 16 // stack alignment requirement - #define STACK_ALIGN_SHIFT 3 // Shift-right amount to convert stack size in bytes to size in pointer sized words - #define STACK_ALIGN_SHIFT_ALL 4 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units + #define STACK_ALIGN_SHIFT 4 // Shift-right amount to convert size in bytes to size in STACK_ALIGN units == log2(STACK_ALIGN) #if ETW_EBP_FRAMED #define RBM_ETW_FRAMED_EBP RBM_NONE @@ -958,7 +955,6 @@ typedef unsigned char regNumberSmall; #define CODE_ALIGN 2 // code alignment requirement #define STACK_ALIGN 8 // stack alignment requirement - #define STACK_ALIGN_SHIFT 2 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs #define RBM_INT_CALLEE_SAVED (RBM_R4|RBM_R5|RBM_R6|RBM_R7|RBM_R8|RBM_R9|RBM_R10) #define RBM_INT_CALLEE_TRASH (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R12|RBM_LR) @@ -1266,7 +1262,6 @@ typedef unsigned char regNumberSmall; #define CODE_ALIGN 4 // code alignment requirement #define STACK_ALIGN 16 // stack alignment requirement - #define STACK_ALIGN_SHIFT 3 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs #define RBM_INT_CALLEE_SAVED (RBM_R19|RBM_R20|RBM_R21|RBM_R22|RBM_R23|RBM_R24|RBM_R25|RBM_R26|RBM_R27|RBM_R28) #define RBM_INT_CALLEE_TRASH (RBM_R0|RBM_R1|RBM_R2|RBM_R3|RBM_R4|RBM_R5|RBM_R6|RBM_R7|RBM_R8|RBM_R9|RBM_R10|RBM_R11|RBM_R12|RBM_R13|RBM_R14|RBM_R15|RBM_IP0|RBM_IP1|RBM_LR) |