summaryrefslogtreecommitdiff
path: root/src/jit
diff options
context:
space:
mode:
Diffstat (limited to 'src/jit')
-rw-r--r--src/jit/CMakeLists.txt9
-rw-r--r--src/jit/DIRS.proj4
-rw-r--r--src/jit/block.cpp20
-rw-r--r--src/jit/block.h161
-rwxr-xr-xsrc/jit/codegen.h2
-rw-r--r--src/jit/codegenarm.cpp1185
-rw-r--r--src/jit/codegenarm64.cpp27
-rw-r--r--src/jit/codegencommon.cpp151
-rw-r--r--src/jit/codegenlegacy.cpp215
-rw-r--r--src/jit/codegenlinear.cpp32
-rw-r--r--src/jit/codegenlinear.h5
-rw-r--r--src/jit/codegenxarch.cpp483
-rw-r--r--src/jit/compiler.cpp83
-rw-r--r--src/jit/compiler.h322
-rw-r--r--src/jit/compiler.hpp68
-rw-r--r--src/jit/compmemkind.h2
-rw-r--r--src/jit/compphases.h6
-rw-r--r--src/jit/decomposelongs.cpp165
-rw-r--r--src/jit/decomposelongs.h2
-rw-r--r--src/jit/ee_il_dll.cpp3
-rw-r--r--src/jit/emit.cpp2
-rw-r--r--src/jit/emit.h21
-rw-r--r--src/jit/emitarm.cpp57
-rw-r--r--src/jit/emitarm64.cpp1
-rw-r--r--src/jit/emitxarch.cpp79
-rw-r--r--src/jit/emitxarch.h40
-rw-r--r--src/jit/flowgraph.cpp1797
-rw-r--r--src/jit/gcencode.cpp11
-rw-r--r--src/jit/gcinfo.cpp6
-rw-r--r--src/jit/gentree.cpp315
-rw-r--r--src/jit/gentree.h188
-rw-r--r--src/jit/gschecks.cpp3
-rw-r--r--src/jit/gtlist.h13
-rw-r--r--src/jit/importer.cpp233
-rw-r--r--src/jit/inline.def2
-rw-r--r--src/jit/instr.cpp12
-rw-r--r--src/jit/instrsxarch.h3
-rw-r--r--src/jit/jit.h12
-rw-r--r--src/jit/jit.settings.targets5
-rw-r--r--src/jit/jitconfigvalues.h25
-rw-r--r--src/jit/jiteh.cpp7
-rw-r--r--src/jit/jiteh.h4
-rw-r--r--src/jit/lclvars.cpp136
-rw-r--r--src/jit/lir.cpp10
-rw-r--r--src/jit/liveness.cpp699
-rw-r--r--src/jit/lower.cpp724
-rw-r--r--src/jit/lower.h23
-rw-r--r--src/jit/lowerarm.cpp148
-rw-r--r--src/jit/lowerarm64.cpp1690
-rw-r--r--src/jit/lowerxarch.cpp3677
-rw-r--r--src/jit/lsra.cpp34
-rw-r--r--src/jit/lsraarm.cpp1073
-rw-r--r--src/jit/lsraarm64.cpp1766
-rw-r--r--src/jit/lsraxarch.cpp3684
-rw-r--r--src/jit/morph.cpp567
-rw-r--r--src/jit/optcse.cpp20
-rw-r--r--src/jit/optimizer.cpp134
-rw-r--r--src/jit/rationalize.cpp16
-rw-r--r--src/jit/regalloc.cpp4
-rw-r--r--src/jit/registerfp.cpp6
-rw-r--r--src/jit/simd.cpp257
-rw-r--r--src/jit/simdcodegenxarch.cpp161
-rw-r--r--src/jit/simdintrinsiclist.h5
-rw-r--r--src/jit/sm.cpp5
-rw-r--r--src/jit/sm.h3
-rw-r--r--src/jit/ssabuilder.cpp410
-rw-r--r--src/jit/ssabuilder.h4
-rw-r--r--src/jit/ssarenamestate.cpp16
-rw-r--r--src/jit/ssarenamestate.h94
-rw-r--r--src/jit/stackfp.cpp10
-rw-r--r--src/jit/target.h20
-rw-r--r--src/jit/unwind.cpp32
-rw-r--r--src/jit/unwindx86.cpp249
-rw-r--r--src/jit/valuenum.cpp804
-rw-r--r--src/jit/valuenum.h4
-rw-r--r--src/jit/valuenumfuncs.h10
-rw-r--r--src/jit/valuenumtype.h4
77 files changed, 14076 insertions, 8204 deletions
diff --git a/src/jit/CMakeLists.txt b/src/jit/CMakeLists.txt
index 96b8c496b9..db6e5973ba 100644
--- a/src/jit/CMakeLists.txt
+++ b/src/jit/CMakeLists.txt
@@ -81,6 +81,7 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64)
codegenxarch.cpp
emitxarch.cpp
lowerxarch.cpp
+ lsraxarch.cpp
simd.cpp
simdcodegenxarch.cpp
targetamd64.cpp
@@ -92,6 +93,7 @@ elseif(CLR_CMAKE_TARGET_ARCH_ARM)
decomposelongs.cpp
emitarm.cpp
lowerarm.cpp
+ lsraarm.cpp
targetarm.cpp
unwindarm.cpp
)
@@ -101,15 +103,18 @@ elseif(CLR_CMAKE_TARGET_ARCH_I386)
decomposelongs.cpp
emitxarch.cpp
lowerxarch.cpp
+ lsraxarch.cpp
simd.cpp
simdcodegenxarch.cpp
targetx86.cpp
+ unwindx86.cpp
)
elseif(CLR_CMAKE_TARGET_ARCH_ARM64)
set( ARCH_SOURCES
codegenarm64.cpp
emitarm64.cpp
lowerarm64.cpp
+ lsraarm64.cpp
targetarm64.cpp
unwindarm.cpp
unwindarm64.cpp
@@ -203,9 +208,9 @@ if (CLR_CMAKE_PLATFORM_ARCH_ARM)
add_subdirectory(protojit)
endif (CLR_CMAKE_PLATFORM_ARCH_ARM)
-if (CLR_CMAKE_PLATFORM_ARCH_I386)
+if (CLR_CMAKE_PLATFORM_ARCH_I386 AND WIN32)
add_subdirectory(legacyjit)
if (NOT CLR_BUILD_JIT32)
add_subdirectory(compatjit)
endif ()
-endif (CLR_CMAKE_PLATFORM_ARCH_I386)
+endif (CLR_CMAKE_PLATFORM_ARCH_I386 AND WIN32)
diff --git a/src/jit/DIRS.proj b/src/jit/DIRS.proj
index 6d1c06d3f0..eb00cc1d64 100644
--- a/src/jit/DIRS.proj
+++ b/src/jit/DIRS.proj
@@ -26,8 +26,8 @@
<ItemGroup Condition="'$(BuildExePhase)' == '1' and '$(BuildProjectName)' != 'CoreSys'">
<!-- Build the "FrankenJit" (RyuJIT front-end, legacy back-end) and "FrankenAltjit". These can't conflict with the names of the JIT32 directory outputs. -->
- <ProjectFile Condition="'$(BuildArchitecture)' == 'i386' or '$(BuildArchitecture)' == 'arm'" Include="frankenjit\frankenjit.nativeproj" />
- <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'" Include="frankenaltjit\frankenaltjit.nativeproj" />
+ <ProjectFile Condition="'$(BuildArchitecture)' == 'arm'" Include="frankenjit\frankenjit.nativeproj" />
+ <ProjectFile Condition="'$(BuildArchitecture)' == 'i386'" Include="frankenaltjit\frankenaltjit.nativeproj" />
<!-- This might be useful, to help make sure JIT devs build all configurations of the JIT (including crossgen), but
it appears to cause problems with the build system, and it slows down normal JIT developer productivity by adding a seldom-useful build.
diff --git a/src/jit/block.cpp b/src/jit/block.cpp
index 47f1052cc8..6d8bc348fd 100644
--- a/src/jit/block.cpp
+++ b/src/jit/block.cpp
@@ -365,6 +365,14 @@ void BasicBlock::dspFlags()
{
printf("KEEP ");
}
+ if (bbFlags & BBF_CLONED_FINALLY_BEGIN)
+ {
+ printf("cfb ");
+ }
+ if (bbFlags & BBF_CLONED_FINALLY_END)
+ {
+ printf("cfe ");
+ }
}
/*****************************************************************************
@@ -564,10 +572,10 @@ void BasicBlock::dspBlockHeader(Compiler* compiler,
#endif // DEBUG
-// Allocation function for HeapPhiArg.
-void* BasicBlock::HeapPhiArg::operator new(size_t sz, Compiler* comp)
+// Allocation function for MemoryPhiArg.
+void* BasicBlock::MemoryPhiArg::operator new(size_t sz, Compiler* comp)
{
- return comp->compGetMem(sz, CMK_HeapPhiArg);
+ return comp->compGetMem(sz, CMK_MemoryPhiArg);
}
//------------------------------------------------------------------------
@@ -664,7 +672,7 @@ bool BasicBlock::IsLIR()
// Return Value:
// The first statement in the block's bbTreeList.
//
-GenTreeStmt* BasicBlock::firstStmt()
+GenTreeStmt* BasicBlock::firstStmt() const
{
if (bbTreeList == nullptr)
{
@@ -683,7 +691,7 @@ GenTreeStmt* BasicBlock::firstStmt()
// Return Value:
// The last statement in the block's bbTreeList.
//
-GenTreeStmt* BasicBlock::lastStmt()
+GenTreeStmt* BasicBlock::lastStmt() const
{
if (bbTreeList == nullptr)
{
@@ -765,7 +773,7 @@ BasicBlock* BasicBlock::GetUniqueSucc()
}
// Static vars.
-BasicBlock::HeapPhiArg* BasicBlock::EmptyHeapPhiDef = (BasicBlock::HeapPhiArg*)0x1;
+BasicBlock::MemoryPhiArg* BasicBlock::EmptyMemoryPhiDef = (BasicBlock::MemoryPhiArg*)0x1;
unsigned PtrKeyFuncs<BasicBlock>::GetHashCode(const BasicBlock* ptr)
{
diff --git a/src/jit/block.h b/src/jit/block.h
index 99c0efc1a7..786b83178f 100644
--- a/src/jit/block.h
+++ b/src/jit/block.h
@@ -144,6 +144,88 @@ struct EntryState
StackEntry* esStack; // ptr to stack
};
+// Enumeration of the kinds of memory whose state changes the compiler tracks
+enum MemoryKind
+{
+ ByrefExposed = 0, // Includes anything byrefs can read/write (everything in GcHeap, address-taken locals,
+ // unmanaged heap, callers' locals, etc.)
+ GcHeap, // Includes actual GC heap, and also static fields
+ MemoryKindCount, // Number of MemoryKinds
+};
+#ifdef DEBUG
+const char* const memoryKindNames[] = {"ByrefExposed", "GcHeap"};
+#endif // DEBUG
+
+// Bitmask describing a set of memory kinds (usable in bitfields)
+typedef unsigned int MemoryKindSet;
+
+// Bitmask for a MemoryKindSet containing just the specified MemoryKind
+inline MemoryKindSet memoryKindSet(MemoryKind memoryKind)
+{
+ return (1U << memoryKind);
+}
+
+// Bitmask for a MemoryKindSet containing the specified MemoryKinds
+template <typename... MemoryKinds>
+inline MemoryKindSet memoryKindSet(MemoryKind memoryKind, MemoryKinds... memoryKinds)
+{
+ return memoryKindSet(memoryKind) | memoryKindSet(memoryKinds...);
+}
+
+// Bitmask containing all the MemoryKinds
+const MemoryKindSet fullMemoryKindSet = (1 << MemoryKindCount) - 1;
+
+// Bitmask containing no MemoryKinds
+const MemoryKindSet emptyMemoryKindSet = 0;
+
+// Standard iterator class for iterating through MemoryKinds
+class MemoryKindIterator
+{
+ int value;
+
+public:
+ explicit inline MemoryKindIterator(int val) : value(val)
+ {
+ }
+ inline MemoryKindIterator& operator++()
+ {
+ ++value;
+ return *this;
+ }
+ inline MemoryKindIterator operator++(int)
+ {
+ return MemoryKindIterator(value++);
+ }
+ inline MemoryKind operator*()
+ {
+ return static_cast<MemoryKind>(value);
+ }
+ friend bool operator==(const MemoryKindIterator& left, const MemoryKindIterator& right)
+ {
+ return left.value == right.value;
+ }
+ friend bool operator!=(const MemoryKindIterator& left, const MemoryKindIterator& right)
+ {
+ return left.value != right.value;
+ }
+};
+
+// Empty struct that allows enumerating memory kinds via `for(MemoryKind kind : allMemoryKinds())`
+struct allMemoryKinds
+{
+ inline allMemoryKinds()
+ {
+ }
+ inline MemoryKindIterator begin()
+ {
+ return MemoryKindIterator(0);
+ }
+ inline MemoryKindIterator end()
+ {
+ return MemoryKindIterator(MemoryKindCount);
+ }
+};
+
// This encapsulates the "exception handling" successors of a block. That is,
// if a basic block BB1 occurs in a try block, we consider the first basic block
// BB2 of the corresponding handler to be an "EH successor" of BB1. Because we
@@ -353,15 +435,18 @@ struct BasicBlock : private LIR::Range
// BBJ_CALLFINALLY block, as well as, on x86, the final step block out of a
// finally.
+#define BBF_CLONED_FINALLY_BEGIN 0x100000000 // First block of a cloned finally region
+#define BBF_CLONED_FINALLY_END 0x200000000 // Last block of a cloned finally region
+
// Flags that relate blocks to loop structure.
#define BBF_LOOP_FLAGS (BBF_LOOP_PREHEADER | BBF_LOOP_HEAD | BBF_LOOP_CALL0 | BBF_LOOP_CALL1)
- bool isRunRarely()
+ bool isRunRarely() const
{
return ((bbFlags & BBF_RUN_RARELY) != 0);
}
- bool isLoopHead()
+ bool isLoopHead() const
{
return ((bbFlags & BBF_LOOP_HEAD) != 0);
}
@@ -388,7 +473,7 @@ struct BasicBlock : private LIR::Range
// For example, the top block might or might not have BBF_GC_SAFE_POINT,
// but we assume it does not have BBF_GC_SAFE_POINT any more.
-#define BBF_SPLIT_LOST (BBF_GC_SAFE_POINT | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS)
+#define BBF_SPLIT_LOST (BBF_GC_SAFE_POINT | BBF_HAS_JMP | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END)
// Flags gained by the bottom block when a block is split.
// Note, this is a conservative guess.
@@ -399,7 +484,7 @@ struct BasicBlock : private LIR::Range
#define BBF_SPLIT_GAINED \
(BBF_DONT_REMOVE | BBF_HAS_LABEL | BBF_HAS_JMP | BBF_BACKWARD_JUMP | BBF_HAS_IDX_LEN | BBF_HAS_NEWARRAY | \
- BBF_PROF_WEIGHT | BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS)
+ BBF_PROF_WEIGHT | BBF_HAS_NEWOBJ | BBF_KEEP_BBJ_ALWAYS | BBF_CLONED_FINALLY_END)
#ifndef __GNUC__ // GCC doesn't like C_ASSERT at global scope
static_assert_no_msg((BBF_SPLIT_NONEXIST & BBF_SPLIT_LOST) == 0);
@@ -801,64 +886,46 @@ struct BasicBlock : private LIR::Range
VARSET_TP bbVarUse; // variables used by block (before an assignment)
VARSET_TP bbVarDef; // variables assigned by block (before a use)
- VARSET_TP bbVarTmp; // TEMP: only used by FP enregistering code!
VARSET_TP bbLiveIn; // variables live on entry
VARSET_TP bbLiveOut; // variables live on exit
- // Use, def, live in/out information for the implicit "Heap" variable.
- unsigned bbHeapUse : 1;
- unsigned bbHeapDef : 1;
- unsigned bbHeapLiveIn : 1;
- unsigned bbHeapLiveOut : 1;
- unsigned bbHeapHavoc : 1; // If true, at some point the block does an operation that leaves the heap
- // in an unknown state. (E.g., unanalyzed call, store through unknown
- // pointer...)
+ // Use, def, live in/out information for the implicit memory variable.
+ MemoryKindSet bbMemoryUse : MemoryKindCount; // must be set for any MemoryKinds this block references
+ MemoryKindSet bbMemoryDef : MemoryKindCount; // must be set for any MemoryKinds this block mutates
+ MemoryKindSet bbMemoryLiveIn : MemoryKindCount;
+ MemoryKindSet bbMemoryLiveOut : MemoryKindCount;
+ MemoryKindSet bbMemoryHavoc : MemoryKindCount; // If true, at some point the block does an operation
+ // that leaves memory in an unknown state. (E.g.,
+ // unanalyzed call, store through unknown pointer...)
- // We want to make phi functions for the special implicit var "Heap". But since this is not a real
+ // We want to make phi functions for the special implicit var memory. But since this is not a real
// lclVar, and thus has no local #, we can't use a GenTreePhiArg. Instead, we use this struct.
- struct HeapPhiArg
+ struct MemoryPhiArg
{
- bool m_isSsaNum; // If true, the phi arg is an SSA # for an internal try block heap state, being
- // added to the phi of a catch block. If false, it's a pred block.
- union {
- BasicBlock* m_predBB; // Predecessor block from which the SSA # flows.
- unsigned m_ssaNum; // SSA# for internal block heap state.
- };
- HeapPhiArg* m_nextArg; // Next arg in the list, else NULL.
+ unsigned m_ssaNum; // SSA# for incoming value.
+ MemoryPhiArg* m_nextArg; // Next arg in the list, else NULL.
unsigned GetSsaNum()
{
- if (m_isSsaNum)
- {
- return m_ssaNum;
- }
- else
- {
- assert(m_predBB != nullptr);
- return m_predBB->bbHeapSsaNumOut;
- }
+ return m_ssaNum;
}
- HeapPhiArg(BasicBlock* predBB, HeapPhiArg* nextArg = nullptr)
- : m_isSsaNum(false), m_predBB(predBB), m_nextArg(nextArg)
- {
- }
- HeapPhiArg(unsigned ssaNum, HeapPhiArg* nextArg = nullptr)
- : m_isSsaNum(true), m_ssaNum(ssaNum), m_nextArg(nextArg)
+ MemoryPhiArg(unsigned ssaNum, MemoryPhiArg* nextArg = nullptr) : m_ssaNum(ssaNum), m_nextArg(nextArg)
{
}
void* operator new(size_t sz, class Compiler* comp);
};
- static HeapPhiArg* EmptyHeapPhiDef; // Special value (0x1, FWIW) to represent a to-be-filled in Phi arg list
- // for Heap.
- HeapPhiArg* bbHeapSsaPhiFunc; // If the "in" Heap SSA var is not a phi definition, this value is NULL.
- // Otherwise, it is either the special value EmptyHeapPhiDefn, to indicate
- // that Heap needs a phi definition on entry, or else it is the linked list
- // of the phi arguments.
- unsigned bbHeapSsaNumIn; // The SSA # of "Heap" on entry to the block.
- unsigned bbHeapSsaNumOut; // The SSA # of "Heap" on exit from the block.
+ static MemoryPhiArg* EmptyMemoryPhiDef; // Special value (0x1, FWIW) to represent a to-be-filled in Phi arg list
+ // for Heap.
+ MemoryPhiArg* bbMemorySsaPhiFunc[MemoryKindCount]; // If the "in" Heap SSA var is not a phi definition, this value
+ // is NULL.
+ // Otherwise, it is either the special value EmptyMemoryPhiDefn, to indicate
+ // that Heap needs a phi definition on entry, or else it is the linked list
+ // of the phi arguments.
+ unsigned bbMemorySsaNumIn[MemoryKindCount]; // The SSA # of memory on entry to the block.
+ unsigned bbMemorySsaNumOut[MemoryKindCount]; // The SSA # of memory on exit from the block.
VARSET_TP bbScope; // variables in scope over the block
@@ -981,8 +1048,8 @@ struct BasicBlock : private LIR::Range
return bbNum - 1;
}
- GenTreeStmt* firstStmt();
- GenTreeStmt* lastStmt();
+ GenTreeStmt* firstStmt() const;
+ GenTreeStmt* lastStmt() const;
GenTreeStmt* lastTopLevelStmt();
GenTree* firstNode();
diff --git a/src/jit/codegen.h b/src/jit/codegen.h
index c6e38ab6af..090283ee50 100755
--- a/src/jit/codegen.h
+++ b/src/jit/codegen.h
@@ -390,6 +390,8 @@ protected:
// Save/Restore callee saved float regs to stack
void genPreserveCalleeSavedFltRegs(unsigned lclFrameSize);
void genRestoreCalleeSavedFltRegs(unsigned lclFrameSize);
+ // Generate VZeroupper instruction to avoid AVX/SSE transition penalty
+ void genVzeroupperIfNeeded(bool check256bitOnly = true);
#endif // _TARGET_XARCH_ && FEATURE_STACK_FP_X87
diff --git a/src/jit/codegenarm.cpp b/src/jit/codegenarm.cpp
index 73e51f2ef7..81f5889e3f 100644
--- a/src/jit/codegenarm.cpp
+++ b/src/jit/codegenarm.cpp
@@ -23,15 +23,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "gcinfo.h"
#include "emit.h"
-#ifndef JIT32_GCENCODER
-#include "gcinfoencoder.h"
-#endif
-
-/*****************************************************************************
- *
- * Generate code that will set the given register to the integer constant.
- */
-
+//------------------------------------------------------------------------
+// genSetRegToIcon: Generate code that will set the given register to the integer constant.
+//
void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFlags flags)
{
// Reg cannot be a FP reg
@@ -42,41 +36,78 @@ void CodeGen::genSetRegToIcon(regNumber reg, ssize_t val, var_types type, insFla
// code path.
noway_assert(type != TYP_REF || val == 0);
- if (val == 0)
- {
- instGen_Set_Reg_To_Zero(emitActualTypeSize(type), reg, flags);
- }
- else
- {
- // TODO-CQ: needs all the optimized cases
- getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(type), reg, val);
- }
+ instGen_Set_Reg_To_Imm(emitActualTypeSize(type), reg, val, flags);
}
-/*****************************************************************************
- *
- * Generate code to check that the GS cookie wasn't thrashed by a buffer
- * overrun. If pushReg is true, preserve all registers around code sequence.
- * Otherwise, ECX maybe modified.
- */
+//------------------------------------------------------------------------
+// genEmitGSCookieCheck: Generate code to check that the GS cookie wasn't thrashed by a buffer overrun.
+//
void CodeGen::genEmitGSCookieCheck(bool pushReg)
{
NYI("ARM genEmitGSCookieCheck");
}
-BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
+//------------------------------------------------------------------------
+// genCallFinally: Generate a call to the finally block.
+//
+BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
{
NYI("ARM genCallFinally");
return block;
}
-// move an immediate value into an integer register
-
+//------------------------------------------------------------------------
+// genEHCatchRet:
void CodeGen::genEHCatchRet(BasicBlock* block)
{
NYI("ARM genEHCatchRet");
}
+//---------------------------------------------------------------------
+// genIntrinsic - generate code for a given intrinsic
+//
+// Arguments
+// treeNode - the GT_INTRINSIC node
+//
+// Return value:
+// None
+//
+void CodeGen::genIntrinsic(GenTreePtr treeNode)
+{
+ // Both operand and its result must be of the same floating point type.
+ GenTreePtr srcNode = treeNode->gtOp.gtOp1;
+ assert(varTypeIsFloating(srcNode));
+ assert(srcNode->TypeGet() == treeNode->TypeGet());
+
+ // Right now only Abs/Round/Sqrt are treated as math intrinsics.
+ //
+ switch (treeNode->gtIntrinsic.gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Abs:
+ genConsumeOperands(treeNode->AsOp());
+ getEmitter()->emitInsBinary(INS_vabs, emitTypeSize(treeNode), treeNode, srcNode);
+ break;
+
+ case CORINFO_INTRINSIC_Round:
+ NYI_ARM("genIntrinsic for round - not implemented yet");
+ break;
+
+ case CORINFO_INTRINSIC_Sqrt:
+ genConsumeOperands(treeNode->AsOp());
+ getEmitter()->emitInsBinary(INS_vsqrt, emitTypeSize(treeNode), treeNode, srcNode);
+ break;
+
+ default:
+ assert(!"genIntrinsic: Unsupported intrinsic");
+ unreached();
+ }
+
+ genProduceReg(treeNode);
+}
+
+//------------------------------------------------------------------------
+// instGen_Set_Reg_To_Imm: Move an immediate value into an integer register.
+//
void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm, insFlags flags)
{
// reg cannot be a FP register
@@ -87,23 +118,60 @@ void CodeGen::instGen_Set_Reg_To_Imm(emitAttr size, regNumber reg, ssize_t imm,
size = EA_SIZE(size); // Strip any Reloc flags from size if we aren't doing relocs
}
- if ((imm == 0) && !EA_IS_RELOC(size))
+ if (EA_IS_RELOC(size))
+ {
+ getEmitter()->emitIns_R_I(INS_movw, size, reg, imm);
+ getEmitter()->emitIns_R_I(INS_movt, size, reg, imm);
+ }
+ else if (imm == 0)
{
instGen_Set_Reg_To_Zero(size, reg, flags);
}
else
{
- getEmitter()->emitIns_R_I(INS_mov, size, reg, imm);
+ if (arm_Valid_Imm_For_Mov(imm))
+ {
+ getEmitter()->emitIns_R_I(INS_mov, size, reg, imm, flags);
+ }
+ else // We have to use a movw/movt pair of instructions
+ {
+ ssize_t imm_lo16 = (imm & 0xffff);
+ ssize_t imm_hi16 = (imm >> 16) & 0xffff;
+
+ assert(arm_Valid_Imm_For_Mov(imm_lo16));
+ assert(imm_hi16 != 0);
+
+ getEmitter()->emitIns_R_I(INS_movw, size, reg, imm_lo16);
+
+ // If we've got a low register, the high word is all bits set,
+ // and the high bit of the low word is set, we can sign extend
+ // halfword and save two bytes of encoding. This can happen for
+ // small magnitude negative numbers 'n' for -32768 <= n <= -1.
+
+ if (getEmitter()->isLowRegister(reg) && (imm_hi16 == 0xffff) && ((imm_lo16 & 0x8000) == 0x8000))
+ {
+ getEmitter()->emitIns_R_R(INS_sxth, EA_2BYTE, reg, reg);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_I(INS_movt, size, reg, imm_hi16);
+ }
+
+ if (flags == INS_FLAGS_SET)
+ getEmitter()->emitIns_R_R(INS_mov, size, reg, reg, INS_FLAGS_SET);
+ }
}
+
regTracker.rsTrackRegIntCns(reg, imm);
}
-/*****************************************************************************
- *
- * Generate code to set a register 'targetReg' of type 'targetType' to the constant
- * specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'. This does not call
- * genProduceReg() on the target register.
- */
+//------------------------------------------------------------------------
+// genSetRegToConst: Generate code to set a register 'targetReg' of type 'targetType'
+// to the constant specified by the constant (GT_CNS_INT or GT_CNS_DBL) in 'tree'.
+//
+// Notes:
+// This does not call genProduceReg() on the target register.
+//
void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTreePtr tree)
{
switch (tree->gtOper)
@@ -130,7 +198,42 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
case GT_CNS_DBL:
{
- NYI("GT_CNS_DBL");
+ GenTreeDblCon* dblConst = tree->AsDblCon();
+ double constValue = dblConst->gtDblCon.gtDconVal;
+ // TODO-ARM-CQ: Do we have a faster/smaller way to generate 0.0 in thumb2 ISA ?
+ if (targetType == TYP_FLOAT)
+ {
+ // Get a temp integer register
+ regMaskTP tmpRegMask = tree->gtRsvdRegs;
+ regNumber tmpReg = genRegNumFromMask(tmpRegMask);
+ assert(tmpReg != REG_NA);
+
+ float f = forceCastToFloat(constValue);
+ genSetRegToIcon(tmpReg, *((int*)(&f)));
+ getEmitter()->emitIns_R_R(INS_vmov_i2f, EA_4BYTE, targetReg, tmpReg);
+ }
+ else
+ {
+ assert(targetType == TYP_DOUBLE);
+
+ unsigned* cv = (unsigned*)&constValue;
+
+ // Get two temp integer registers
+ regMaskTP tmpRegsMask = tree->gtRsvdRegs;
+ regMaskTP tmpRegMask = genFindHighestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
+ regNumber tmpReg1 = genRegNumFromMask(tmpRegMask);
+ assert(tmpReg1 != REG_NA);
+
+ tmpRegsMask &= ~genRegMask(tmpReg1); // remove the bit for 'tmpReg1'
+ tmpRegMask = genFindHighestBit(tmpRegsMask); // set tmpRegMsk to a one-bit mask
+ regNumber tmpReg2 = genRegNumFromMask(tmpRegMask);
+ assert(tmpReg2 != REG_NA);
+
+ genSetRegToIcon(tmpReg1, cv[0]);
+ genSetRegToIcon(tmpReg2, cv[1]);
+
+ getEmitter()->emitIns_R_R_R(INS_vmov_i2d, EA_8BYTE, targetReg, tmpReg1, tmpReg2);
+ }
}
break;
@@ -139,18 +242,22 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
}
}
-/*****************************************************************************
- *
- * Generate code for a single node in the tree.
- * Preconditions: All operands have been evaluated
- *
- */
+//------------------------------------------------------------------------
+// genCodeForTreeNode Generate code for a single node in the tree.
+//
+// Preconditions:
+// All operands have been evaluated.
+//
void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
{
regNumber targetReg = treeNode->gtRegNum;
var_types targetType = treeNode->TypeGet();
emitter* emit = getEmitter();
+#ifdef DEBUG
+ lastConsumedNode = nullptr;
+#endif
+
JITDUMP("Generating: ");
DISPNODE(treeNode);
@@ -169,10 +276,33 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
genProduceReg(treeNode);
break;
- case GT_NEG:
case GT_NOT:
+ assert(!varTypeIsFloating(targetType));
+
+ __fallthrough;
+
+ case GT_NEG:
{
- NYI("GT_NEG and GT_NOT");
+ instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+
+ // The arithmetic node must be sitting in a register (since it's not contained)
+ assert(!treeNode->isContained());
+ // The dst can only be a register.
+ assert(targetReg != REG_NA);
+
+ GenTreePtr operand = treeNode->gtGetOp1();
+ assert(!operand->isContained());
+ // The src must be a register.
+ regNumber operandReg = genConsumeReg(operand);
+
+ if (ins == INS_vneg)
+ {
+ getEmitter()->emitIns_R_R(ins, emitTypeSize(treeNode), targetReg, operandReg);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_R_I(ins, emitTypeSize(treeNode), targetReg, operandReg, 0);
+ }
}
genProduceReg(treeNode);
break;
@@ -185,9 +315,10 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_ADD:
case GT_SUB:
+ case GT_MUL:
{
const genTreeOps oper = treeNode->OperGet();
- if ((oper == GT_ADD || oper == GT_SUB) && treeNode->gtOverflow())
+ if ((oper == GT_ADD || oper == GT_SUB || oper == GT_MUL) && treeNode->gtOverflow())
{
// This is also checked in the importer.
NYI("Overflow not yet implemented");
@@ -209,40 +340,47 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
genConsumeIfReg(op1);
genConsumeIfReg(op2);
- // This is the case of reg1 = reg1 op reg2
- // We're ready to emit the instruction without any moves
- if (op1reg == targetReg)
+ if (!varTypeIsFloating(targetType))
{
- dst = op1;
- src = op2;
- }
- // We have reg1 = reg2 op reg1
- // In order for this operation to be correct
- // we need that op is a commutative operation so
- // we can convert it into reg1 = reg1 op reg2 and emit
- // the same code as above
- else if (op2reg == targetReg)
- {
- noway_assert(GenTree::OperIsCommutative(treeNode->OperGet()));
- dst = op2;
- src = op1;
+ // This is the case of reg1 = reg1 op reg2
+ // We're ready to emit the instruction without any moves
+ if (op1reg == targetReg)
+ {
+ dst = op1;
+ src = op2;
+ }
+ // We have reg1 = reg2 op reg1
+ // In order for this operation to be correct
+ // we need that op is a commutative operation so
+ // we can convert it into reg1 = reg1 op reg2 and emit
+ // the same code as above
+ else if (op2reg == targetReg)
+ {
+ assert(GenTree::OperIsCommutative(treeNode->OperGet()));
+ dst = op2;
+ src = op1;
+ }
+ // dest, op1 and op2 registers are different:
+ // reg3 = reg1 op reg2
+ // We can implement this by issuing a mov:
+ // reg3 = reg1
+ // reg3 = reg3 op reg2
+ else
+ {
+ inst_RV_RV(ins_Move_Extend(targetType, true), targetReg, op1reg, op1->gtType);
+ regTracker.rsTrackRegCopy(targetReg, op1reg);
+ gcInfo.gcMarkRegPtrVal(targetReg, targetType);
+ dst = treeNode;
+ src = op2;
+ }
+
+ regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
+ assert(r == targetReg);
}
- // dest, op1 and op2 registers are different:
- // reg3 = reg1 op reg2
- // We can implement this by issuing a mov:
- // reg3 = reg1
- // reg3 = reg3 op reg2
else
{
- inst_RV_RV(ins_Move_Extend(targetType, true), targetReg, op1reg, op1->gtType);
- regTracker.rsTrackRegCopy(targetReg, op1reg);
- gcInfo.gcMarkRegPtrVal(targetReg, targetType);
- dst = treeNode;
- src = op2;
+ emit->emitIns_R_R_R(ins, emitTypeSize(treeNode), targetReg, op1reg, op2reg);
}
-
- regNumber r = emit->emitInsBinary(ins, emitTypeSize(treeNode), dst, src);
- noway_assert(r == targetReg);
}
genProduceReg(treeNode);
break;
@@ -429,17 +567,11 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_IND:
+ genConsumeAddress(treeNode->AsIndir()->Addr());
emit->emitInsMov(ins_Load(treeNode->TypeGet()), emitTypeSize(treeNode), treeNode);
genProduceReg(treeNode);
break;
- case GT_MUL:
- {
- NYI("GT_MUL");
- }
- genProduceReg(treeNode);
- break;
-
case GT_MOD:
case GT_UDIV:
case GT_UMOD:
@@ -451,17 +583,45 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_DIV:
{
- NYI("GT_DIV");
- }
+ genConsumeOperands(treeNode->AsOp());
+
+ noway_assert(targetReg != REG_NA);
+
+ GenTreePtr dst = treeNode;
+ GenTreePtr src1 = treeNode->gtGetOp1();
+ GenTreePtr src2 = treeNode->gtGetOp2();
+ instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+ emitAttr attr = emitTypeSize(treeNode);
+ regNumber result = REG_NA;
+
+ // dst can only be a reg
+ assert(!dst->isContained());
+
+ // src can be only reg
+ assert(!src1->isContained() || !src2->isContained());
+
+ if (varTypeIsFloating(targetType))
+ {
+ // Floating point divide never raises an exception
+
+ emit->emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+ }
+ else // an signed integer divide operation
+ {
+ // TODO-ARM-Bug: handle zero division exception.
+
+ emit->emitIns_R_R_R(ins, attr, dst->gtRegNum, src1->gtRegNum, src2->gtRegNum);
+ }
+
genProduceReg(treeNode);
- break;
+ }
+ break;
case GT_INTRINSIC:
{
- NYI("GT_INTRINSIC");
+ genIntrinsic(treeNode);
}
- genProduceReg(treeNode);
- break;
+ break;
case GT_EQ:
case GT_NE:
@@ -485,26 +645,12 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
emitAttr cmpAttr;
if (varTypeIsFloating(op1))
{
- NYI("Floating point compare");
-
- bool isUnordered = ((treeNode->gtFlags & GTF_RELOP_NAN_UN) != 0);
- switch (tree->OperGet())
- {
- case GT_EQ:
- ins = INS_beq;
- case GT_NE:
- ins = INS_bne;
- case GT_LT:
- ins = isUnordered ? INS_blt : INS_blo;
- case GT_LE:
- ins = isUnordered ? INS_ble : INS_bls;
- case GT_GE:
- ins = isUnordered ? INS_bpl : INS_bge;
- case GT_GT:
- ins = isUnordered ? INS_bhi : INS_bgt;
- default:
- unreached();
- }
+ assert(op1->TypeGet() == op2->TypeGet());
+ ins = INS_vcmp;
+ cmpAttr = emitTypeSize(op1->TypeGet());
+ emit->emitInsBinary(ins, cmpAttr, op1, op2);
+ // vmrs with register 0xf has special meaning of transferring flags
+ emit->emitIns_R(INS_vmrs, EA_4BYTE, REG_R15);
}
else
{
@@ -522,12 +668,12 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
bool op1Is64Bit = (varTypeIsLong(op1Type) || op1Type == TYP_REF);
bool op2Is64Bit = (varTypeIsLong(op2Type) || op2Type == TYP_REF);
NYI_IF(op1Is64Bit || op2Is64Bit, "Long compare");
- assert(!op1->isContainedMemoryOp() || op1Type == op2Type);
- assert(!op2->isContainedMemoryOp() || op1Type == op2Type);
+ assert(!op1->isUsedFromMemory() || op1Type == op2Type);
+ assert(!op2->isUsedFromMemory() || op1Type == op2Type);
cmpAttr = emitTypeSize(cmpType);
}
+ emit->emitInsBinary(ins, cmpAttr, op1, op2);
}
- emit->emitInsBinary(ins, cmpAttr, op1, op2);
// Are we evaluating this into a register?
if (targetReg != REG_NA)
@@ -579,7 +725,68 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_STOREIND:
{
- NYI("GT_STOREIND");
+ GenTreeStoreInd* storeInd = treeNode->AsStoreInd();
+ GenTree* data = storeInd->Data();
+ GenTree* addr = storeInd->Addr();
+ var_types targetType = storeInd->TypeGet();
+
+ assert(!varTypeIsFloating(targetType) || (targetType == data->TypeGet()));
+
+ GCInfo::WriteBarrierForm writeBarrierForm = gcInfo.gcIsWriteBarrierCandidate(treeNode, data);
+ if (writeBarrierForm != GCInfo::WBF_NoBarrier)
+ {
+ // data and addr must be in registers.
+ // Consume both registers so that any copies of interfering
+ // registers are taken care of.
+ genConsumeOperands(storeInd->AsOp());
+
+#if NOGC_WRITE_BARRIERS
+ NYI_ARM("NOGC_WRITE_BARRIERS");
+#else
+ // At this point, we should not have any interference.
+ // That is, 'data' must not be in REG_ARG_0,
+ // as that is where 'addr' must go.
+ noway_assert(data->gtRegNum != REG_ARG_0);
+
+ // addr goes in REG_ARG_0
+ if (addr->gtRegNum != REG_ARG_0)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_0, addr->gtRegNum, addr->TypeGet());
+ }
+
+ // data goes in REG_ARG_1
+ if (data->gtRegNum != REG_ARG_1)
+ {
+ inst_RV_RV(INS_mov, REG_ARG_1, data->gtRegNum, data->TypeGet());
+ }
+#endif // NOGC_WRITE_BARRIERS
+
+ genGCWriteBarrier(storeInd, writeBarrierForm);
+ }
+ else // A normal store, not a WriteBarrier store
+ {
+ bool reverseOps = ((storeInd->gtFlags & GTF_REVERSE_OPS) != 0);
+ bool dataIsUnary = false;
+
+ // We must consume the operands in the proper execution order,
+ // so that liveness is updated appropriately.
+ if (!reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+
+ if (!data->isContained())
+ {
+ genConsumeRegs(data);
+ }
+
+ if (reverseOps)
+ {
+ genConsumeAddress(addr);
+ }
+
+ emit->emitInsMov(ins_Store(data->TypeGet()), emitTypeSize(storeInd), storeInd);
+ }
}
break;
@@ -682,7 +889,14 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
break;
case GT_NO_OP:
- NYI("GT_NO_OP");
+ if (treeNode->gtFlags & GTF_NO_OP_NO)
+ {
+ noway_assert(!"GTF_NO_OP_NO should not be set");
+ }
+ else
+ {
+ instGen(INS_nop);
+ }
break;
case GT_ARR_BOUNDS_CHECK:
@@ -733,13 +947,22 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
emit->emitIns_R_L(INS_lea, EA_PTRSIZE, genPendingCallLabel, treeNode->gtRegNum);
break;
+ case GT_CLS_VAR_ADDR:
+ emit->emitIns_R_C(INS_lea, EA_PTRSIZE, targetReg, treeNode->gtClsVar.gtClsVarHnd, 0);
+ genProduceReg(treeNode);
+ break;
+
+ case GT_IL_OFFSET:
+ // Do nothing; these nodes are simply markers for debug info.
+ break;
+
default:
{
#ifdef DEBUG
char message[256];
_snprintf_s(message, _countof(message), _TRUNCATE, "NYI: Unimplemented node type %s\n",
GenTree::NodeName(treeNode->OperGet()));
- notYetImplemented(message, __FILE__, __LINE__);
+ NYIRAW(message);
#else
NYI("unimplemented node");
#endif
@@ -748,24 +971,33 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
}
}
-// generate code for the locked operations:
-// GT_LOCKADD, GT_XCHG, GT_XADD
+//------------------------------------------------------------------------
+// genLockedInstructions: Generate code for the locked operations.
+//
+// Notes:
+// Handles GT_LOCKADD, GT_XCHG, GT_XADD nodes.
+//
void CodeGen::genLockedInstructions(GenTreeOp* treeNode)
{
NYI("genLockedInstructions");
}
-// generate code for GT_ARR_BOUNDS_CHECK node
+//------------------------------------------------------------------------
+// genRangeCheck: generate code for GT_ARR_BOUNDS_CHECK node.
+//
void CodeGen::genRangeCheck(GenTreePtr oper)
{
noway_assert(oper->OperGet() == GT_ARR_BOUNDS_CHECK);
GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
- GenTreePtr arrLen = bndsChk->gtArrLen->gtEffectiveVal();
GenTreePtr arrIdx = bndsChk->gtIndex->gtEffectiveVal();
+ GenTreePtr arrLen = bndsChk->gtArrLen->gtEffectiveVal();
GenTreePtr arrRef = NULL;
int lenOffset = 0;
+ genConsumeIfReg(arrIdx);
+ genConsumeIfReg(arrLen);
+
GenTree * src1, *src2;
emitJumpKind jmpKind;
@@ -784,15 +1016,13 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
jmpKind = genJumpKindForOper(GT_GE, CK_UNSIGNED);
}
- genConsumeIfReg(src1);
- genConsumeIfReg(src2);
-
getEmitter()->emitInsBinary(INS_cmp, emitAttr(TYP_INT), src1, src2);
genJumpToThrowHlpBlk(jmpKind, SCK_RNGCHK_FAIL, bndsChk->gtIndRngFailBB);
}
-// make a temporary indir we can feed to pattern matching routines
-// in cases where we don't want to instantiate all the indirs that happen
+//------------------------------------------------------------------------
+// indirForm: Make a temporary indir we can feed to pattern matching routines
+// in cases where we don't want to instantiate all the indirs that happen.
//
GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
{
@@ -804,8 +1034,9 @@ GenTreeIndir CodeGen::indirForm(var_types type, GenTree* base)
return i;
}
-// make a temporary int we can feed to pattern matching routines
-// in cases where we don't want to instantiate
+//------------------------------------------------------------------------
+// intForm: Make a temporary int we can feed to pattern matching routines
+// in cases where we don't want to instantiate.
//
GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
{
@@ -817,6 +1048,9 @@ GenTreeIntCon CodeGen::intForm(var_types type, ssize_t value)
return i;
}
+//------------------------------------------------------------------------
+// genGetInsForOper: Return instruction encoding of the operation tree.
+//
instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
{
instruction ins;
@@ -835,6 +1069,9 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
case GT_MUL:
ins = INS_MUL;
break;
+ case GT_DIV:
+ ins = INS_sdiv;
+ break;
case GT_LSH:
ins = INS_SHIFT_LEFT_LOGICAL;
break;
@@ -878,21 +1115,331 @@ instruction CodeGen::genGetInsForOper(genTreeOps oper, var_types type)
//
void CodeGen::genCodeForShift(GenTreePtr tree)
{
- NYI("genCodeForShift");
+ var_types targetType = tree->TypeGet();
+ genTreeOps oper = tree->OperGet();
+ instruction ins = genGetInsForOper(oper, targetType);
+ emitAttr size = emitTypeSize(tree);
+
+ assert(tree->gtRegNum != REG_NA);
+
+ GenTreePtr operand = tree->gtGetOp1();
+ genConsumeReg(operand);
+
+ GenTreePtr shiftBy = tree->gtGetOp2();
+ if (!shiftBy->IsCnsIntOrI())
+ {
+ genConsumeReg(shiftBy);
+ getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum);
+ }
+ else
+ {
+ unsigned immWidth = size * BITS_PER_BYTE;
+ ssize_t shiftByImm = shiftBy->gtIntCon.gtIconVal & (immWidth - 1);
+
+ getEmitter()->emitIns_R_R_I(ins, size, tree->gtRegNum, operand->gtRegNum, shiftByImm);
+ }
+
+ genProduceReg(tree);
}
+//------------------------------------------------------------------------
+// genRegCopy: Generate a register copy.
+//
void CodeGen::genRegCopy(GenTree* treeNode)
{
NYI("genRegCopy");
}
-// Produce code for a GT_CALL node
+//------------------------------------------------------------------------
+// genCallInstruction: Produce code for a GT_CALL node
+//
void CodeGen::genCallInstruction(GenTreePtr node)
{
- NYI("Call not implemented");
+ GenTreeCall* call = node->AsCall();
+
+ assert(call->gtOper == GT_CALL);
+
+ gtCallTypes callType = (gtCallTypes)call->gtCallType;
+
+ IL_OFFSETX ilOffset = BAD_IL_OFFSET;
+
+ // all virtuals should have been expanded into a control expression
+ assert(!call->IsVirtual() || call->gtControlExpr || call->gtCallAddr);
+
+ // Consume all the arg regs
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->OperIsList());
+
+ GenTreePtr argNode = list->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode->gtSkipReloadOrCopy());
+ assert(curArgTabEntry);
+
+ if (curArgTabEntry->regNum == REG_STK)
+ continue;
+
+ // Deal with multi register passed struct args.
+ if (argNode->OperGet() == GT_FIELD_LIST)
+ {
+ GenTreeArgList* argListPtr = argNode->AsArgList();
+ unsigned iterationNum = 0;
+ regNumber argReg = curArgTabEntry->regNum;
+ for (; argListPtr != nullptr; argListPtr = argListPtr->Rest(), iterationNum++)
+ {
+ GenTreePtr putArgRegNode = argListPtr->gtOp.gtOp1;
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+
+ genConsumeReg(putArgRegNode);
+
+ if (putArgRegNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(putArgRegNode->TypeGet(), putArgRegNode->InReg()), argReg,
+ putArgRegNode->gtRegNum);
+ }
+
+ argReg = genRegArgNext(argReg);
+ }
+ }
+ else
+ {
+ regNumber argReg = curArgTabEntry->regNum;
+ genConsumeReg(argNode);
+ if (argNode->gtRegNum != argReg)
+ {
+ inst_RV_RV(ins_Move_Extend(argNode->TypeGet(), argNode->InReg()), argReg, argNode->gtRegNum);
+ }
+ }
+
+ // In the case of a varargs call,
+ // the ABI dictates that if we have floating point args,
+ // we must pass the enregistered arguments in both the
+ // integer and floating point registers so, let's do that.
+ if (call->IsVarargs() && varTypeIsFloating(argNode))
+ {
+ NYI_ARM("CodeGen - IsVarargs");
+ }
+ }
+
+ // Insert a null check on "this" pointer if asked.
+ if (call->NeedsNullCheck())
+ {
+ const regNumber regThis = genGetThisArgReg(call);
+ const regNumber tmpReg = genRegNumFromMask(node->gtRsvdRegs);
+ getEmitter()->emitIns_R_R_I(INS_ldr, EA_4BYTE, tmpReg, regThis, 0);
+ }
+
+ // Either gtControlExpr != null or gtCallAddr != null or it is a direct non-virtual call to a user or helper method.
+ CORINFO_METHOD_HANDLE methHnd;
+ GenTree* target = call->gtControlExpr;
+ if (callType == CT_INDIRECT)
+ {
+ assert(target == nullptr);
+ target = call->gtCall.gtCallAddr;
+ methHnd = nullptr;
+ }
+ else
+ {
+ methHnd = call->gtCallMethHnd;
+ }
+
+ CORINFO_SIG_INFO* sigInfo = nullptr;
+#ifdef DEBUG
+ // Pass the call signature information down into the emitter so the emitter can associate
+ // native call sites with the signatures they were generated from.
+ if (callType != CT_HELPER)
+ {
+ sigInfo = call->callSig;
+ }
+#endif // DEBUG
+
+ // If fast tail call, then we are done.
+ if (call->IsFastTailCall())
+ {
+ NYI_ARM("fast tail call");
+ }
+
+ // For a pinvoke to unmanaged code we emit a label to clear
+ // the GC pointer state before the callsite.
+ // We can't utilize the typical lazy killing of GC pointers
+ // at (or inside) the callsite.
+ if (call->IsUnmanaged())
+ {
+ genDefineTempLabel(genCreateTempLabel());
+ }
+
+ // Determine return value size(s).
+ ReturnTypeDesc* pRetTypeDesc = call->GetReturnTypeDesc();
+ emitAttr retSize = EA_PTRSIZE;
+
+ if (call->HasMultiRegRetVal())
+ {
+ NYI_ARM("has multi reg ret val");
+ }
+ else
+ {
+ assert(!varTypeIsStruct(call));
+
+ if (call->gtType == TYP_REF || call->gtType == TYP_ARRAY)
+ {
+ retSize = EA_GCREF;
+ }
+ else if (call->gtType == TYP_BYREF)
+ {
+ retSize = EA_BYREF;
+ }
+ }
+
+ // We need to propagate the IL offset information to the call instruction, so we can emit
+ // an IL to native mapping record for the call, to support managed return value debugging.
+ // We don't want tail call helper calls that were converted from normal calls to get a record,
+ // so we skip this hash table lookup logic in that case.
+ if (compiler->opts.compDbgInfo && compiler->genCallSite2ILOffsetMap != nullptr && !call->IsTailCall())
+ {
+ (void)compiler->genCallSite2ILOffsetMap->Lookup(call, &ilOffset);
+ }
+
+ if (target != nullptr)
+ {
+ // For ARM a call target can not be a contained indirection
+ assert(!target->isContainedIndir());
+
+ // We have already generated code for gtControlExpr evaluating it into a register.
+ // We just need to emit "call reg" in this case.
+ //
+ assert(genIsValidIntReg(target->gtRegNum));
+
+ genEmitCall(emitter::EC_INDIR_R, methHnd,
+ INDEBUG_LDISASM_COMMA(sigInfo) nullptr, // addr
+ retSize, ilOffset, target->gtRegNum);
+ }
+ else
+ {
+ // Generate a direct call to a non-virtual user defined or helper method
+ assert(callType == CT_HELPER || callType == CT_USER_FUNC);
+
+ void* addr = nullptr;
+ if (callType == CT_HELPER)
+ {
+ // Direct call to a helper method.
+ CorInfoHelpFunc helperNum = compiler->eeGetHelperNum(methHnd);
+ noway_assert(helperNum != CORINFO_HELP_UNDEF);
+
+ void* pAddr = nullptr;
+ addr = compiler->compGetHelperFtn(helperNum, (void**)&pAddr);
+
+ if (addr == nullptr)
+ {
+ addr = pAddr;
+ }
+ }
+ else
+ {
+ // Direct call to a non-virtual user function.
+ CORINFO_ACCESS_FLAGS aflags = CORINFO_ACCESS_ANY;
+ if (call->IsSameThis())
+ {
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_THIS);
+ }
+
+ if ((call->NeedsNullCheck()) == 0)
+ {
+ aflags = (CORINFO_ACCESS_FLAGS)(aflags | CORINFO_ACCESS_NONNULL);
+ }
+
+ CORINFO_CONST_LOOKUP addrInfo;
+ compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &addrInfo, aflags);
+
+ addr = addrInfo.addr;
+ }
+
+ assert(addr);
+ // Non-virtual direct call to known addresses
+ if (!arm_Valid_Imm_For_BL((ssize_t)addr))
+ {
+ regNumber tmpReg = genRegNumFromMask(node->gtRsvdRegs);
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, tmpReg, (ssize_t)addr);
+ genEmitCall(emitter::EC_INDIR_R, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) NULL, retSize, ilOffset, tmpReg);
+ }
+ else
+ {
+ genEmitCall(emitter::EC_FUNC_TOKEN, methHnd, INDEBUG_LDISASM_COMMA(sigInfo) addr, retSize, ilOffset);
+ }
+ }
+
+ // if it was a pinvoke we may have needed to get the address of a label
+ if (genPendingCallLabel)
+ {
+ assert(call->IsUnmanaged());
+ genDefineTempLabel(genPendingCallLabel);
+ genPendingCallLabel = nullptr;
+ }
+
+ // Update GC info:
+ // All Callee arg registers are trashed and no longer contain any GC pointers.
+ // TODO-ARM-Bug?: As a matter of fact shouldn't we be killing all of callee trashed regs here?
+ // For now we will assert that other than arg regs gc ref/byref set doesn't contain any other
+ // registers from RBM_CALLEE_TRASH
+ assert((gcInfo.gcRegGCrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+ assert((gcInfo.gcRegByrefSetCur & (RBM_CALLEE_TRASH & ~RBM_ARG_REGS)) == 0);
+ gcInfo.gcRegGCrefSetCur &= ~RBM_ARG_REGS;
+ gcInfo.gcRegByrefSetCur &= ~RBM_ARG_REGS;
+
+ var_types returnType = call->TypeGet();
+ if (returnType != TYP_VOID)
+ {
+ regNumber returnReg;
+
+ if (call->HasMultiRegRetVal())
+ {
+ assert(pRetTypeDesc != nullptr);
+ unsigned regCount = pRetTypeDesc->GetReturnRegCount();
+
+ // If regs allocated to call node are different from ABI return
+ // regs in which the call has returned its result, move the result
+ // to regs allocated to call node.
+ for (unsigned i = 0; i < regCount; ++i)
+ {
+ var_types regType = pRetTypeDesc->GetReturnRegType(i);
+ returnReg = pRetTypeDesc->GetABIReturnReg(i);
+ regNumber allocatedReg = call->GetRegNumByIdx(i);
+ if (returnReg != allocatedReg)
+ {
+ inst_RV_RV(ins_Copy(regType), allocatedReg, returnReg, regType);
+ }
+ }
+ }
+ else
+ {
+ if (varTypeIsFloating(returnType))
+ {
+ returnReg = REG_FLOATRET;
+ }
+ else
+ {
+ returnReg = REG_INTRET;
+ }
+
+ if (call->gtRegNum != returnReg)
+ {
+ inst_RV_RV(ins_Copy(returnType), call->gtRegNum, returnReg, returnType);
+ }
+ }
+
+ genProduceReg(call);
+ }
+
+ // If there is nothing next, that means the result is thrown away, so this value is not live.
+ // However, for minopts or debuggable code, we keep it live to support managed return value debugging.
+ if ((call->gtNext == nullptr) && !compiler->opts.MinOpts() && !compiler->opts.compDbgCode)
+ {
+ gcInfo.gcMarkRegSetNpt(RBM_INTRET);
+ }
}
-// produce code for a GT_LEA subnode
+//------------------------------------------------------------------------
+// genLeaInstruction: Produce code for a GT_LEA subnode.
+//
void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
{
if (lea->Base() && lea->Index())
@@ -909,12 +1456,44 @@ void CodeGen::genLeaInstruction(GenTreeAddrMode* lea)
genProduceReg(lea);
}
-// Generate code to materialize a condition into a register
-// (the condition codes must already have been appropriately set)
-
+//------------------------------------------------------------------------
+// genSetRegToCond: Generate code to materialize a condition into a register.
+//
+// Arguments:
+// dstReg - The target register to set to 1 or 0
+// tree - The GenTree Relop node that was used to set the Condition codes
+//
+// Return Value: none
+//
+// Preconditions:
+// The condition codes must already have been appropriately set.
+//
void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
{
- NYI("genSetRegToCond");
+ // Emit code like that:
+ // ...
+ // bgt True
+ // movs rD, #0
+ // b Next
+ // True:
+ // movs rD, #1
+ // Next:
+ // ...
+
+ CompareKind compareKind = ((tree->gtFlags & GTF_UNSIGNED) != 0) ? CK_UNSIGNED : CK_SIGNED;
+ emitJumpKind jmpKind = genJumpKindForOper(tree->gtOper, compareKind);
+
+ BasicBlock* labelTrue = genCreateTempLabel();
+ getEmitter()->emitIns_J(emitter::emitJumpKindToIns(jmpKind), labelTrue);
+
+ getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(tree->gtType), dstReg, 0);
+
+ BasicBlock* labelNext = genCreateTempLabel();
+ getEmitter()->emitIns_J(INS_b, labelNext);
+
+ genDefineTempLabel(labelTrue);
+ getEmitter()->emitIns_R_I(INS_mov, emitActualTypeSize(tree->gtType), dstReg, 1);
+ genDefineTempLabel(labelNext);
}
//------------------------------------------------------------------------
@@ -933,7 +1512,85 @@ void CodeGen::genSetRegToCond(regNumber dstReg, GenTreePtr tree)
//
void CodeGen::genIntToIntCast(GenTreePtr treeNode)
{
- NYI("Cast");
+ assert(treeNode->OperGet() == GT_CAST);
+
+ GenTreePtr castOp = treeNode->gtCast.CastOp();
+ emitter* emit = getEmitter();
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = genActualType(castOp->TypeGet());
+ emitAttr movSize = emitActualTypeSize(dstType);
+ bool movRequired = false;
+
+ regNumber targetReg = treeNode->gtRegNum;
+ regNumber sourceReg = castOp->gtRegNum;
+
+ // For Long to Int conversion we will have a reserved integer register to hold the immediate mask
+ regNumber tmpReg = (treeNode->gtRsvdRegs == RBM_NONE) ? REG_NA : genRegNumFromMask(treeNode->gtRsvdRegs);
+
+ assert(genIsValidIntReg(targetReg));
+ assert(genIsValidIntReg(sourceReg));
+
+ instruction ins = INS_invalid;
+
+ genConsumeReg(castOp);
+ Lowering::CastInfo castInfo;
+
+ // Get information about the cast.
+ Lowering::getCastDescription(treeNode, &castInfo);
+
+ if (castInfo.requiresOverflowCheck)
+ {
+ NYI_ARM("CodeGen::genIntToIntCast for OverflowCheck");
+ }
+ else // Non-overflow checking cast.
+ {
+ if (genTypeSize(srcType) == genTypeSize(dstType))
+ {
+ ins = INS_mov;
+ }
+ else
+ {
+ var_types extendType = TYP_UNKNOWN;
+
+ // If we need to treat a signed type as unsigned
+ if ((treeNode->gtFlags & GTF_UNSIGNED) != 0)
+ {
+ extendType = genUnsignedType(srcType);
+ movSize = emitTypeSize(extendType);
+ movRequired = true;
+ }
+ else
+ {
+ if (genTypeSize(srcType) < genTypeSize(dstType))
+ {
+ extendType = srcType;
+ movSize = emitTypeSize(srcType);
+ if (srcType == TYP_UINT)
+ {
+ movRequired = true;
+ }
+ }
+ else // (genTypeSize(srcType) > genTypeSize(dstType))
+ {
+ extendType = dstType;
+ movSize = emitTypeSize(dstType);
+ }
+ }
+
+ ins = ins_Move_Extend(extendType, castOp->InReg());
+ }
+ }
+
+ // We should never be generating a load from memory instruction here!
+ assert(!emit->emitInsIsLoad(ins));
+
+ if ((ins != INS_mov) || movRequired || (targetReg != sourceReg))
+ {
+ emit->emitIns_R_R(ins, movSize, targetReg, sourceReg);
+ }
+
+ genProduceReg(treeNode);
}
//------------------------------------------------------------------------
@@ -952,7 +1609,39 @@ void CodeGen::genIntToIntCast(GenTreePtr treeNode)
//
void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
{
- NYI("Cast");
+ // float <--> double conversions are always non-overflow ones
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidFloatReg(targetReg));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained()); // Cannot be contained
+ assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+ genConsumeOperands(treeNode->AsOp());
+
+ // treeNode must be a reg
+ assert(!treeNode->isContained());
+
+ if (srcType != dstType)
+ {
+ instruction insVcvt = (srcType == TYP_FLOAT) ? INS_vcvt_f2d // convert Float to Double
+ : INS_vcvt_d2f; // convert Double to Float
+
+ getEmitter()->emitIns_R_R(insVcvt, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
+ }
+ else if (treeNode->gtRegNum != op1->gtRegNum)
+ {
+ getEmitter()->emitIns_R_R(INS_vmov, emitTypeSize(treeNode), treeNode->gtRegNum, op1->gtRegNum);
+ }
+
+ genProduceReg(treeNode);
}
//------------------------------------------------------------------------
@@ -971,7 +1660,69 @@ void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
//
void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
{
- NYI("Cast");
+ // int --> float/double conversions are always non-overflow ones
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
+
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidFloatReg(targetReg));
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained()); // Cannot be contained
+ assert(genIsValidIntReg(op1->gtRegNum)); // Must be a valid int reg.
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(!varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
+
+ // force the srcType to unsigned if GT_UNSIGNED flag is set
+ if (treeNode->gtFlags & GTF_UNSIGNED)
+ {
+ srcType = genUnsignedType(srcType);
+ }
+
+ // We should never see a srcType whose size is neither EA_4BYTE or EA_8BYTE
+ // For conversions from small types (byte/sbyte/int16/uint16) to float/double,
+ // we expect the front-end or lowering phase to have generated two levels of cast.
+ //
+ emitAttr srcSize = EA_ATTR(genTypeSize(srcType));
+ noway_assert((srcSize == EA_4BYTE) || (srcSize == EA_8BYTE));
+
+ instruction insVcvt = INS_invalid;
+
+ if (dstType == TYP_DOUBLE)
+ {
+ if (srcSize == EA_4BYTE)
+ {
+ insVcvt = (varTypeIsUnsigned(srcType)) ? INS_vcvt_u2d : INS_vcvt_i2d;
+ }
+ else
+ {
+ assert(srcSize == EA_8BYTE);
+ NYI_ARM("Casting int64/uint64 to double in genIntToFloatCast");
+ }
+ }
+ else
+ {
+ assert(dstType == TYP_FLOAT);
+ if (srcSize == EA_4BYTE)
+ {
+ insVcvt = (varTypeIsUnsigned(srcType)) ? INS_vcvt_u2f : INS_vcvt_i2f;
+ }
+ else
+ {
+ assert(srcSize == EA_8BYTE);
+ NYI_ARM("Casting int64/uint64 to float in genIntToFloatCast");
+ }
+ }
+
+ genConsumeOperands(treeNode->AsOp());
+
+ assert(insVcvt != INS_invalid);
+ getEmitter()->emitIns_R_R(INS_vmov_i2f, srcSize, treeNode->gtRegNum, op1->gtRegNum);
+ getEmitter()->emitIns_R_R(insVcvt, srcSize, treeNode->gtRegNum, treeNode->gtRegNum);
+
+ genProduceReg(treeNode);
}
//------------------------------------------------------------------------
@@ -990,31 +1741,72 @@ void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
//
void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
{
- NYI("Cast");
-}
+ // we don't expect to see overflow detecting float/double --> int type conversions here
+ // as they should have been converted into helper calls by front-end.
+ assert(treeNode->OperGet() == GT_CAST);
+ assert(!treeNode->gtOverflow());
-/*****************************************************************************
- *
- * Create and record GC Info for the function.
- */
-#ifdef JIT32_GCENCODER
-void*
-#else
-void
-#endif
-CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
-{
-#ifdef JIT32_GCENCODER
- return genCreateAndStoreGCInfoJIT32(codeSize, prologSize, epilogSize DEBUGARG(codePtr));
-#else
- genCreateAndStoreGCInfoX64(codeSize, prologSize DEBUGARG(codePtr));
-#endif
-}
+ regNumber targetReg = treeNode->gtRegNum;
+ assert(genIsValidIntReg(targetReg)); // Must be a valid int reg.
+
+ GenTreePtr op1 = treeNode->gtOp.gtOp1;
+ assert(!op1->isContained()); // Cannot be contained
+ assert(genIsValidFloatReg(op1->gtRegNum)); // Must be a valid float reg.
+
+ var_types dstType = treeNode->CastToType();
+ var_types srcType = op1->TypeGet();
+ assert(varTypeIsFloating(srcType) && !varTypeIsFloating(dstType));
+
+ // We should never see a dstType whose size is neither EA_4BYTE or EA_8BYTE
+ // For conversions to small types (byte/sbyte/int16/uint16) from float/double,
+ // we expect the front-end or lowering phase to have generated two levels of cast.
+ //
+ emitAttr dstSize = EA_ATTR(genTypeSize(dstType));
+ noway_assert((dstSize == EA_4BYTE) || (dstSize == EA_8BYTE));
+
+ instruction insVcvt = INS_invalid;
+
+ if (srcType == TYP_DOUBLE)
+ {
+ if (dstSize == EA_4BYTE)
+ {
+ insVcvt = (varTypeIsUnsigned(dstType)) ? INS_vcvt_d2u : INS_vcvt_d2i;
+ }
+ else
+ {
+ assert(dstSize == EA_8BYTE);
+ NYI_ARM("Casting double to int64/uint64 in genIntToFloatCast");
+ }
+ }
+ else
+ {
+ assert(srcType == TYP_FLOAT);
+ if (dstSize == EA_4BYTE)
+ {
+ insVcvt = (varTypeIsUnsigned(dstType)) ? INS_vcvt_f2u : INS_vcvt_f2i;
+ }
+ else
+ {
+ assert(dstSize == EA_8BYTE);
+ NYI_ARM("Casting float to int64/uint64 in genIntToFloatCast");
+ }
+ }
-// TODO-ARM-Cleanup: It seems that the ARM JIT (classic and otherwise) uses this method, so it seems to be
-// inappropriately named?
+ genConsumeOperands(treeNode->AsOp());
+
+ assert(insVcvt != INS_invalid);
+ getEmitter()->emitIns_R_R(insVcvt, dstSize, op1->gtRegNum, op1->gtRegNum);
+ getEmitter()->emitIns_R_R(INS_vmov_f2i, dstSize, treeNode->gtRegNum, op1->gtRegNum);
+
+ genProduceReg(treeNode);
+}
-void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize DEBUGARG(void* codePtr))
+//------------------------------------------------------------------------
+// genCreateAndStoreGCInfo: Create and record GC Info for the function.
+//
+void CodeGen::genCreateAndStoreGCInfo(unsigned codeSize,
+ unsigned prologSize,
+ unsigned epilogSize DEBUGARG(void* codePtr))
{
IAllocator* allowZeroAlloc = new (compiler, CMK_GC) AllowZeroAllocator(compiler->getAllocatorGC());
GcInfoEncoder* gcInfoEncoder = new (compiler, CMK_GC)
@@ -1039,20 +1831,73 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize
compiler->compInfoBlkSize = 0; // not exposed by the GCEncoder interface
}
-/*****************************************************************************
- * Emit a call to a helper function.
- */
-
-void CodeGen::genEmitHelperCall(unsigned helper,
- int argSize,
- emitAttr retSize
-#ifndef LEGACY_BACKEND
- ,
- regNumber callTargetReg /*= REG_NA */
-#endif // !LEGACY_BACKEND
- )
+//------------------------------------------------------------------------
+// genEmitHelperCall: Emit a call to a helper function.
+//
+void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regNumber callTargetReg /*= REG_NA */)
{
- NYI("Helper call");
+ // Can we call the helper function directly
+
+ void *addr = NULL, **pAddr = NULL;
+
+#if defined(DEBUG) && defined(PROFILING_SUPPORTED)
+ // Don't ask VM if it hasn't requested ELT hooks
+ if (!compiler->compProfilerHookNeeded && compiler->opts.compJitELTHookEnabled &&
+ (helper == CORINFO_HELP_PROF_FCN_ENTER || helper == CORINFO_HELP_PROF_FCN_LEAVE ||
+ helper == CORINFO_HELP_PROF_FCN_TAILCALL))
+ {
+ addr = compiler->compProfilerMethHnd;
+ }
+ else
+#endif
+ {
+ addr = compiler->compGetHelperFtn((CorInfoHelpFunc)helper, (void**)&pAddr);
+ }
+
+ if (!addr || !arm_Valid_Imm_For_BL((ssize_t)addr))
+ {
+ if (callTargetReg == REG_NA)
+ {
+ // If a callTargetReg has not been explicitly provided, we will use REG_DEFAULT_HELPER_CALL_TARGET, but
+ // this is only a valid assumption if the helper call is known to kill REG_DEFAULT_HELPER_CALL_TARGET.
+ callTargetReg = REG_DEFAULT_HELPER_CALL_TARGET;
+ }
+
+ // Load the address into a register and call through a register
+ if (addr)
+ {
+ instGen_Set_Reg_To_Imm(EA_HANDLE_CNS_RELOC, callTargetReg, (ssize_t)addr);
+ }
+ else
+ {
+ getEmitter()->emitIns_R_AI(INS_ldr, EA_PTR_DSP_RELOC, callTargetReg, (ssize_t)pAddr);
+ regTracker.rsTrackRegTrash(callTargetReg);
+ }
+
+ getEmitter()->emitIns_Call(emitter::EC_INDIR_R, compiler->eeFindHelper(helper),
+ INDEBUG_LDISASM_COMMA(nullptr) NULL, // addr
+ argSize, retSize, gcInfo.gcVarPtrSetCur, gcInfo.gcRegGCrefSetCur,
+ gcInfo.gcRegByrefSetCur,
+ BAD_IL_OFFSET, // ilOffset
+ callTargetReg, // ireg
+ REG_NA, 0, 0, // xreg, xmul, disp
+ false, // isJump
+ emitter::emitNoGChelper(helper),
+ (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
+ }
+ else
+ {
+ getEmitter()->emitIns_Call(emitter::EC_FUNC_TOKEN, compiler->eeFindHelper(helper),
+ INDEBUG_LDISASM_COMMA(nullptr) addr, argSize, retSize, gcInfo.gcVarPtrSetCur,
+ gcInfo.gcRegGCrefSetCur, gcInfo.gcRegByrefSetCur, BAD_IL_OFFSET, REG_NA, REG_NA, 0,
+ 0, /* ilOffset, ireg, xreg, xmul, disp */
+ false, /* isJump */
+ emitter::emitNoGChelper(helper),
+ (CorInfoHelpFunc)helper == CORINFO_HELP_PROF_FCN_LEAVE);
+ }
+
+ regTracker.rsTrashRegSet(RBM_CALLEE_TRASH);
+ regTracker.rsTrashRegsForGCInterruptability();
}
#endif // _TARGET_ARM_
diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index cc7c5dc524..71c6dd1162 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -1326,7 +1326,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
genDefineTempLabel(gsCheckBlk);
}
-BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
+BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
{
// Generate a call to the finally, like this:
// mov x0,qword ptr [fp + 10H] / sp // Load x0 with PSPSym, or sp if PSPSym is not used
@@ -1387,8 +1387,6 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
if (!(block->bbFlags & BBF_RETLESS_CALL))
{
assert(block->isBBCallAlwaysPair());
-
- lblk = block;
block = block->bbNext;
}
return block;
@@ -1918,6 +1916,10 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
emitter* emit = getEmitter();
#ifdef DEBUG
+ // Validate that all the operands for the current node are consumed in order.
+ // This is important because LSRA ensures that any necessary copies will be
+ // handled correctly.
+ lastConsumedNode = nullptr;
if (compiler->verbose)
{
unsigned seqNum = treeNode->gtSeqNum; // Useful for setting a conditional break in Visual Studio
@@ -2262,7 +2264,6 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
else
{
assert(!data->isContained());
- genConsumeReg(data);
dataReg = data->gtRegNum;
}
assert(dataReg != REG_NA);
@@ -2314,7 +2315,6 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
else
{
assert(!data->isContained());
- genConsumeReg(data);
dataReg = data->gtRegNum;
}
assert(dataReg != REG_NA);
@@ -2423,8 +2423,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
var_types op1Type = op1->TypeGet();
var_types op2Type = op2->TypeGet();
- assert(!op1->isContainedMemoryOp());
- assert(!op2->isContainedMemoryOp());
+ assert(!op1->isUsedFromMemory());
+ assert(!op2->isUsedFromMemory());
genConsumeOperands(tree);
@@ -3798,8 +3798,8 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
GenTree * src1, *src2;
emitJumpKind jmpKind;
- genConsumeRegs(arrLen);
genConsumeRegs(arrIndex);
+ genConsumeRegs(arrLen);
if (arrIndex->isContainedIntOrIImmed())
{
@@ -3951,14 +3951,14 @@ void CodeGen::genCodeForArrOffset(GenTreeArrOffs* arrOffset)
if (!offsetNode->IsIntegralConst(0))
{
- emitter* emit = getEmitter();
- GenTreePtr arrObj = arrOffset->gtArrObj;
- regNumber arrReg = genConsumeReg(arrObj);
- noway_assert(arrReg != REG_NA);
+ emitter* emit = getEmitter();
regNumber offsetReg = genConsumeReg(offsetNode);
noway_assert(offsetReg != REG_NA);
regNumber indexReg = genConsumeReg(indexNode);
noway_assert(indexReg != REG_NA);
+ GenTreePtr arrObj = arrOffset->gtArrObj;
+ regNumber arrReg = genConsumeReg(arrObj);
+ noway_assert(arrReg != REG_NA);
regMaskTP tmpRegMask = arrOffset->gtRsvdRegs;
regNumber tmpReg = genRegNumFromMask(tmpRegMask);
noway_assert(tmpReg != REG_NA);
@@ -4118,12 +4118,11 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
assert(tree->gtRegNum != REG_NA);
GenTreePtr operand = tree->gtGetOp1();
- genConsumeReg(operand);
+ genConsumeOperands(tree->AsOp());
GenTreePtr shiftBy = tree->gtGetOp2();
if (!shiftBy->IsCnsIntOrI())
{
- genConsumeReg(shiftBy);
getEmitter()->emitIns_R_R_R(ins, size, tree->gtRegNum, operand->gtRegNum, shiftBy->gtRegNum);
}
else
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 240911523f..b1e474b755 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -2475,6 +2475,10 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind
EJ_jle, // GT_LE
EJ_jge, // GT_GE
EJ_jg, // GT_GT
+#ifndef LEGACY_BACKEND
+ EJ_je, // GT_TEST_EQ
+ EJ_jne, // GT_TEST_NE
+#endif
#elif defined(_TARGET_ARMARCH_)
EJ_eq, // GT_EQ
EJ_ne, // GT_NE
@@ -2494,6 +2498,10 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind
EJ_jbe, // GT_LE
EJ_jae, // GT_GE
EJ_ja, // GT_GT
+#ifndef LEGACY_BACKEND
+ EJ_je, // GT_TEST_EQ
+ EJ_jne, // GT_TEST_NE
+#endif
#elif defined(_TARGET_ARMARCH_)
EJ_eq, // GT_EQ
EJ_ne, // GT_NE
@@ -2513,6 +2521,10 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind
EJ_NONE, // GT_LE
EJ_jns, // GT_GE (S == 0)
EJ_NONE, // GT_GT
+#ifndef LEGACY_BACKEND
+ EJ_NONE, // GT_TEST_EQ
+ EJ_NONE, // GT_TEST_NE
+#endif
#elif defined(_TARGET_ARMARCH_)
EJ_eq, // GT_EQ (Z == 1)
EJ_ne, // GT_NE (Z == 0)
@@ -2530,6 +2542,10 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind
assert(genJCCinsSigned[GT_LE - GT_EQ] == EJ_jle);
assert(genJCCinsSigned[GT_GE - GT_EQ] == EJ_jge);
assert(genJCCinsSigned[GT_GT - GT_EQ] == EJ_jg);
+#ifndef LEGACY_BACKEND
+ assert(genJCCinsSigned[GT_TEST_EQ - GT_EQ] == EJ_je);
+ assert(genJCCinsSigned[GT_TEST_NE - GT_EQ] == EJ_jne);
+#endif
assert(genJCCinsUnsigned[GT_EQ - GT_EQ] == EJ_je);
assert(genJCCinsUnsigned[GT_NE - GT_EQ] == EJ_jne);
@@ -2537,6 +2553,10 @@ emitJumpKind CodeGen::genJumpKindForOper(genTreeOps cmp, CompareKind compareKind
assert(genJCCinsUnsigned[GT_LE - GT_EQ] == EJ_jbe);
assert(genJCCinsUnsigned[GT_GE - GT_EQ] == EJ_jae);
assert(genJCCinsUnsigned[GT_GT - GT_EQ] == EJ_ja);
+#ifndef LEGACY_BACKEND
+ assert(genJCCinsUnsigned[GT_TEST_EQ - GT_EQ] == EJ_je);
+ assert(genJCCinsUnsigned[GT_TEST_NE - GT_EQ] == EJ_jne);
+#endif
assert(genJCCinsLogical[GT_EQ - GT_EQ] == EJ_je);
assert(genJCCinsLogical[GT_NE - GT_EQ] == EJ_jne);
@@ -3145,12 +3165,17 @@ void CodeGen::genGenerateCode(void** codePtr, ULONG* nativeSizeOfCode)
/* Check our max stack level. Needed for fgAddCodeRef().
We need to relax the assert as our estimation won't include code-gen
stack changes (which we know don't affect fgAddCodeRef()) */
- noway_assert(getEmitter()->emitMaxStackDepth <=
- (compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
- compiler->compHndBBtabCount + // Return address for locally-called finallys
- genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
- (compiler->compTailCallUsed ? 4 : 0))); // CORINFO_HELP_TAILCALL args
+ {
+ unsigned maxAllowedStackDepth = compiler->fgPtrArgCntMax + // Max number of pointer-sized stack arguments.
+ compiler->compHndBBtabCount + // Return address for locally-called finallys
+ genTypeStSz(TYP_LONG) + // longs/doubles may be transferred via stack, etc
+ (compiler->compTailCallUsed ? 4 : 0); // CORINFO_HELP_TAILCALL args
+#if defined(UNIX_X86_ABI)
+ maxAllowedStackDepth += genTypeStSz(TYP_INT) * 3; // stack align for x86 - allow up to 3 INT's for padding
#endif
+ noway_assert(getEmitter()->emitMaxStackDepth <= maxAllowedStackDepth);
+ }
+#endif // EMIT_TRACK_STACK_DEPTH
*nativeSizeOfCode = codeSize;
compiler->info.compNativeCodeSize = (UNATIVE_OFFSET)codeSize;
@@ -10241,6 +10266,66 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()
// Look in CodeGenArm64.cpp
+#elif defined(_TARGET_X86_)
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet prolog.
+ */
+
+void CodeGen::genFuncletProlog(BasicBlock* block)
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genFuncletProlog()\n");
+ }
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingProlog(&compiler->compGeneratingProlog, true);
+
+ compiler->unwindBegProlog();
+
+ // TODO Save callee-saved registers
+
+ // This is the end of the OS-reported prolog for purposes of unwinding
+ compiler->unwindEndProlog();
+}
+
+/*****************************************************************************
+ *
+ * Generates code for an EH funclet epilog.
+ */
+
+void CodeGen::genFuncletEpilog()
+{
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("*************** In genFuncletEpilog()\n");
+ }
+#endif
+
+ ScopedSetVariable<bool> _setGeneratingEpilog(&compiler->compGeneratingEpilog, true);
+
+ // TODO Restore callee-saved registers
+
+ instGen_Return(0);
+}
+
+/*****************************************************************************
+ *
+ * Capture the information used to generate the funclet prologs and epilogs.
+ */
+
+void CodeGen::genCaptureFuncletPrologEpilogInfo()
+{
+ if (!compiler->ehAnyFunclets())
+ {
+ return;
+ }
+}
+
#else // _TARGET_*
/*****************************************************************************
@@ -10583,6 +10668,7 @@ GenTreePtr CodeGen::genMakeConst(const void* cnsAddr, var_types cnsType, GenTree
// funclet frames: this will be FuncletInfo.fiSpDelta.
void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
{
+ genVzeroupperIfNeeded(false);
regMaskTP regMask = compiler->compCalleeFPRegsSavedMask;
// Only callee saved floating point registers should be in regMask
@@ -10621,16 +10707,6 @@ void CodeGen::genPreserveCalleeSavedFltRegs(unsigned lclFrameSize)
offset -= XMM_REGSIZE_BYTES;
}
}
-
-#ifdef FEATURE_AVX_SUPPORT
- // Just before restoring float registers issue a Vzeroupper to zero out upper 128-bits of all YMM regs.
- // This is to avoid penalty if this routine is using AVX-256 and now returning to a routine that is
- // using SSE2.
- if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX)
- {
- instGen(INS_vzeroupper);
- }
-#endif
}
// Save/Restore compCalleeFPRegsPushed with the smallest register number saved at [RSP+offset], working
@@ -10651,6 +10727,7 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
// fast path return
if (regMask == RBM_NONE)
{
+ genVzeroupperIfNeeded();
return;
}
@@ -10682,16 +10759,6 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
assert((offset % 16) == 0);
#endif // _TARGET_AMD64_
-#ifdef FEATURE_AVX_SUPPORT
- // Just before restoring float registers issue a Vzeroupper to zero out upper 128-bits of all YMM regs.
- // This is to avoid penalty if this routine is using AVX-256 and now returning to a routine that is
- // using SSE2.
- if (compiler->getFloatingPointInstructionSet() == InstructionSet_AVX)
- {
- instGen(INS_vzeroupper);
- }
-#endif
-
for (regNumber reg = REG_FLT_CALLEE_SAVED_FIRST; regMask != RBM_NONE; reg = REG_NEXT(reg))
{
regMaskTP regBit = genRegMask(reg);
@@ -10706,7 +10773,41 @@ void CodeGen::genRestoreCalleeSavedFltRegs(unsigned lclFrameSize)
offset -= XMM_REGSIZE_BYTES;
}
}
+ genVzeroupperIfNeeded();
}
+
+// Generate Vzeroupper instruction as needed to zero out upper 128b-bit of all YMM registers so that the
+// AVX/Legacy SSE transition penalties can be avoided. This function is been used in genPreserveCalleeSavedFltRegs
+// (prolog) and genRestoreCalleeSavedFltRegs (epilog). Issue VZEROUPPER in Prolog if the method contains
+// 128-bit or 256-bit AVX code, to avoid legacy SSE to AVX transition penalty, which could happen when native
+// code contains legacy SSE code calling into JIT AVX code (e.g. reverse pinvoke). Issue VZEROUPPER in Epilog
+// if the method contains 256-bit AVX code, to avoid AVX to legacy SSE transition penalty.
+//
+// Params
+// check256bitOnly - true to check if the function contains 256-bit AVX instruction and generate Vzeroupper
+// instruction, false to check if the function contains AVX instruciton (either 128-bit or 256-bit).
+//
+void CodeGen::genVzeroupperIfNeeded(bool check256bitOnly /* = true*/)
+{
+#ifdef FEATURE_AVX_SUPPORT
+ bool emitVzeroUpper = false;
+ if (check256bitOnly)
+ {
+ emitVzeroUpper = getEmitter()->Contains256bitAVX();
+ }
+ else
+ {
+ emitVzeroUpper = getEmitter()->ContainsAVX();
+ }
+
+ if (emitVzeroUpper)
+ {
+ assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+ instGen(INS_vzeroupper);
+ }
+#endif
+}
+
#endif // defined(_TARGET_XARCH_) && !FEATURE_STACK_FP_X87
//-----------------------------------------------------------------------------------
diff --git a/src/jit/codegenlegacy.cpp b/src/jit/codegenlegacy.cpp
index 667b9d4af8..0530863d81 100644
--- a/src/jit/codegenlegacy.cpp
+++ b/src/jit/codegenlegacy.cpp
@@ -1837,6 +1837,15 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
GenTreePtr arrRef = NULL;
int lenOffset = 0;
+ /* Is the array index a constant value? */
+ GenTreePtr index = bndsChk->gtIndex;
+ if (!index->IsCnsIntOrI())
+ {
+ // No, it's not a constant.
+ genCodeForTree(index, RBM_ALLINT);
+ regSet.rsMarkRegUsed(index);
+ }
+
// If "arrLen" is a ARR_LENGTH operation, get the array whose length that takes in a register.
// Otherwise, if the length is not a constant, get it (the length, not the arr reference) in
// a register.
@@ -1884,14 +1893,8 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
}
}
- /* Is the array index a constant value? */
- GenTreePtr index = bndsChk->gtIndex;
if (!index->IsCnsIntOrI())
{
- // No, it's not a constant.
- genCodeForTree(index, RBM_ALLINT);
- regSet.rsMarkRegUsed(index);
-
// If we need "arrRef" or "arrLen", and evaluating "index" displaced whichever of them we're using
// from its register, get it back in a register.
if (arrRef != NULL)
@@ -1983,6 +1986,11 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
}
// Free the registers that were used.
+ if (!index->IsCnsIntOrI())
+ {
+ regSet.rsMarkRegFree(index->gtRegNum, index);
+ }
+
if (arrRef != NULL)
{
regSet.rsMarkRegFree(arrRef->gtRegNum, arrRef);
@@ -1991,11 +1999,6 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
{
regSet.rsMarkRegFree(arrLen->gtRegNum, arrLen);
}
-
- if (!index->IsCnsIntOrI())
- {
- regSet.rsMarkRegFree(index->gtRegNum, index);
- }
}
/*****************************************************************************
@@ -2590,7 +2593,7 @@ regMaskTP CodeGen::genRestoreAddrMode(GenTreePtr addr, GenTreePtr tree, bool loc
if (tree->gtOp.gtOp1)
regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp1, lockPhase);
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
regMask |= genRestoreAddrMode(addr, tree->gtOp.gtOp2, lockPhase);
}
else if (tree->gtOper == GT_ARR_ELEM)
@@ -3039,7 +3042,7 @@ AGAIN:
noway_assert(kind & GTK_SMPOP);
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
{
genEvalSideEffects(tree->gtOp.gtOp1);
@@ -9689,7 +9692,7 @@ void CodeGen::genCodeForTreeSmpOp(GenTreePtr tree, regMaskTP destReg, regMaskTP
const genTreeOps oper = tree->OperGet();
const var_types treeType = tree->TypeGet();
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
regNumber reg = DUMMY_INIT(REG_CORRUPT);
regMaskTP regs = regSet.rsMaskUsed;
regMaskTP needReg = destReg;
@@ -13394,7 +13397,7 @@ void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP av
int helper;
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
switch (oper)
{
@@ -14538,79 +14541,6 @@ void CodeGen::genCodeForTreeLng(GenTreePtr tree, regMaskTP needReg, regMaskTP av
goto DONE;
-#if LONG_ASG_OPS
-
- case GT_ASG_OR:
- insLo = insHi = INS_OR;
- goto ASG_OPR;
- case GT_ASG_XOR:
- insLo = insHi = INS_XOR;
- goto ASG_OPR;
- case GT_ASG_AND:
- insLo = insHi = INS_AND;
- goto ASG_OPR;
- case GT_ASG_SUB:
- insLo = INS_sub;
- insHi = INS_SUBC;
- goto ASG_OPR;
- case GT_ASG_ADD:
- insLo = INS_add;
- insHi = INS_ADDC;
- goto ASG_OPR;
-
- ASG_OPR:
-
- if (op2->gtOper == GT_CNS_LNG)
- {
- __int64 lval = op2->gtLngCon.gtLconVal;
-
- /* Make the target addressable */
-
- addrReg = genMakeAddressable(op1, needReg, RegSet::FREE_REG);
-
- /* Optimize some special cases */
-
- doLo = doHi = true;
-
- /* Check for "(op1 AND -1)" and "(op1 [X]OR 0)" */
-
- switch (oper)
- {
- case GT_ASG_AND:
- if ((int)(lval) == -1)
- doLo = false;
- if ((int)(lval >> 32) == -1)
- doHi = false;
- break;
-
- case GT_ASG_OR:
- case GT_ASG_XOR:
- if (!(lval & 0x00000000FFFFFFFF))
- doLo = false;
- if (!(lval & 0xFFFFFFFF00000000))
- doHi = false;
- break;
- }
-
- if (doLo)
- inst_TT_IV(insLo, op1, (int)(lval), 0);
- if (doHi)
- inst_TT_IV(insHi, op1, (int)(lval >> 32), 4);
-
- bool isArith = (oper == GT_ASG_ADD || oper == GT_ASG_SUB);
- if (doLo || doHi)
- tree->gtFlags |= GTF_ZSF_SET;
-
- genDoneAddressable(op1, addrReg, RegSet::FREE_REG);
- goto DONE_ASSG_REGS;
- }
-
- /* TODO: allow non-const long assignment operators */
-
- noway_assert(!"non-const long asgop NYI");
-
-#endif // LONG_ASG_OPS
-
case GT_IND:
case GT_NULLCHECK:
{
@@ -20725,27 +20655,26 @@ bool CodeGen::genRegTrashable(regNumber reg, GenTreePtr tree)
*/
GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, // The node to start walking with.
- GenTreePtr relopNode, // The node before the startNode.
+ GenTreePtr relopNode) // The node before the startNode.
// (It should either be NULL or
// a GTF_RELOP_QMARK node.)
- GenTreePtr asgdLclVar)
{
GenTreePtr tree;
VARSET_TP VARSET_INIT(this, defSet_BeforeSplit, fgCurDefSet); // Store the current fgCurDefSet and fgCurUseSet so
VARSET_TP VARSET_INIT(this, useSet_BeforeSplit, fgCurUseSet); // we can restore then before entering the elseTree.
- bool heapUse_BeforeSplit = fgCurHeapUse;
- bool heapDef_BeforeSplit = fgCurHeapDef;
- bool heapHavoc_BeforeSplit = fgCurHeapHavoc;
+ MemoryKindSet memoryUse_BeforeSplit = fgCurMemoryUse;
+ MemoryKindSet memoryDef_BeforeSplit = fgCurMemoryDef;
+ MemoryKindSet memoryHavoc_BeforeSplit = fgCurMemoryHavoc;
VARSET_TP VARSET_INIT_NOCOPY(defSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // These two variables will store
// the USE and DEF sets after
VARSET_TP VARSET_INIT_NOCOPY(useSet_AfterThenTree, VarSetOps::MakeEmpty(this)); // evaluating the thenTree.
- bool heapUse_AfterThenTree = fgCurHeapUse;
- bool heapDef_AfterThenTree = fgCurHeapDef;
- bool heapHavoc_AfterThenTree = fgCurHeapHavoc;
+ MemoryKindSet memoryUse_AfterThenTree = fgCurMemoryUse;
+ MemoryKindSet memoryDef_AfterThenTree = fgCurMemoryDef;
+ MemoryKindSet memoryHavoc_AfterThenTree = fgCurMemoryHavoc;
// relopNode is either NULL or a GTF_RELOP_QMARK node.
assert(!relopNode || (relopNode->OperKind() & GTK_RELOP) && (relopNode->gtFlags & GTF_RELOP_QMARK));
@@ -20772,9 +20701,9 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
VarSetOps::IntersectionD(this, fgCurDefSet, defSet_AfterThenTree);
VarSetOps::UnionD(this, fgCurUseSet, useSet_AfterThenTree);
- fgCurHeapDef = fgCurHeapDef && heapDef_AfterThenTree;
- fgCurHeapHavoc = fgCurHeapHavoc && heapHavoc_AfterThenTree;
- fgCurHeapUse = fgCurHeapUse || heapUse_AfterThenTree;
+ fgCurMemoryDef = fgCurMemoryDef & memoryDef_AfterThenTree;
+ fgCurMemoryHavoc = fgCurMemoryHavoc & memoryHavoc_AfterThenTree;
+ fgCurMemoryUse = fgCurMemoryUse | memoryUse_AfterThenTree;
// Return the GT_QMARK node itself so the caller can continue from there.
// NOTE: the caller will get to the next node by doing the "tree = tree->gtNext"
@@ -20791,16 +20720,16 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
VarSetOps::Assign(this, defSet_AfterThenTree, fgCurDefSet);
VarSetOps::Assign(this, useSet_AfterThenTree, fgCurUseSet);
- heapDef_AfterThenTree = fgCurHeapDef;
- heapHavoc_AfterThenTree = fgCurHeapHavoc;
- heapUse_AfterThenTree = fgCurHeapUse;
+ memoryDef_AfterThenTree = fgCurMemoryDef;
+ memoryHavoc_AfterThenTree = fgCurMemoryHavoc;
+ memoryUse_AfterThenTree = fgCurMemoryUse;
VarSetOps::Assign(this, fgCurDefSet, defSet_BeforeSplit);
VarSetOps::Assign(this, fgCurUseSet, useSet_BeforeSplit);
- fgCurHeapDef = heapDef_BeforeSplit;
- fgCurHeapHavoc = heapHavoc_BeforeSplit;
- fgCurHeapUse = heapUse_BeforeSplit;
+ fgCurMemoryDef = memoryDef_BeforeSplit;
+ fgCurMemoryHavoc = memoryHavoc_BeforeSplit;
+ fgCurMemoryUse = memoryUse_BeforeSplit;
break;
@@ -20810,43 +20739,43 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
case GT_LCL_FLD_ADDR:
case GT_STORE_LCL_VAR:
case GT_STORE_LCL_FLD:
- fgMarkUseDef(tree->AsLclVarCommon(), asgdLclVar);
+ fgMarkUseDef(tree->AsLclVarCommon());
break;
case GT_CLS_VAR:
- // For Volatile indirection, first mutate the global heap
+ // For Volatile indirection, first mutate GcHeap/ByrefExposed
// see comments in ValueNum.cpp (under case GT_CLS_VAR)
// This models Volatile reads as def-then-use of the heap.
// and allows for a CSE of a subsequent non-volatile read
if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
{
// For any Volatile indirection, we must handle it as a
- // definition of the global heap
- fgCurHeapDef = true;
+ // definition of GcHeap/ByrefExposed
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
}
// If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to
// assignment.
// Otherwise, we treat it as a use here.
- if (!fgCurHeapDef && (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
+ if ((tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
{
- fgCurHeapUse = true;
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
}
break;
case GT_IND:
- // For Volatile indirection, first mutate the global heap
+ // For Volatile indirection, first mutate GcHeap/ByrefExposed
// see comments in ValueNum.cpp (under case GT_CLS_VAR)
// This models Volatile reads as def-then-use of the heap.
// and allows for a CSE of a subsequent non-volatile read
if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
{
// For any Volatile indirection, we must handle it as a
- // definition of the global heap
- fgCurHeapDef = true;
+ // definition of GcHeap/ByrefExposed
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
}
// If the GT_IND is the lhs of an assignment, we'll handle it
- // as a heap def, when we get to assignment.
+ // as a heap/byref def, when we get to assignment.
// Otherwise, we treat it as a use here.
if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
{
@@ -20855,16 +20784,13 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
GenTreePtr addrArg = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire))
{
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
}
else
{
// Defines a local addr
assert(dummyLclVarTree != nullptr);
- fgMarkUseDef(dummyLclVarTree->AsLclVarCommon(), asgdLclVar);
+ fgMarkUseDef(dummyLclVarTree->AsLclVarCommon());
}
}
break;
@@ -20875,25 +20801,23 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
unreached();
break;
- // We'll assume these are use-then-defs of the heap.
+ // We'll assume these are use-then-defs of GcHeap/ByrefExposed.
case GT_LOCKADD:
case GT_XADD:
case GT_XCHG:
case GT_CMPXCHG:
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
- fgCurHeapDef = true;
- fgCurHeapHavoc = true;
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
break;
case GT_MEMORYBARRIER:
- // Simliar to any Volatile indirection, we must handle this as a definition of the global heap
- fgCurHeapDef = true;
+ // Simliar to any Volatile indirection, we must handle this as a definition of GcHeap/ByrefExposed
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
break;
- // For now, all calls read/write the heap, the latter in its entirety. Might tighten this case later.
+ // For now, all calls read/write GcHeap/ByrefExposed, writes in their entirety. Might tighten this case
+ // later.
case GT_CALL:
{
GenTreeCall* call = tree->AsCall();
@@ -20909,12 +20833,9 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
}
if (modHeap)
{
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
- fgCurHeapDef = true;
- fgCurHeapHavoc = true;
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
}
}
@@ -20946,14 +20867,26 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
default:
- // Determine whether it defines a heap location.
+ // Determine what memory kinds it defines.
if (tree->OperIsAssignment() || tree->OperIsBlkOp())
{
GenTreeLclVarCommon* dummyLclVarTree = NULL;
- if (!tree->DefinesLocal(this, &dummyLclVarTree))
+ if (tree->DefinesLocal(this, &dummyLclVarTree))
+ {
+ if (lvaVarAddrExposed(dummyLclVarTree->gtLclNum))
+ {
+ fgCurMemoryDef |= memoryKindSet(ByrefExposed);
+
+ // We've found a store that modifies ByrefExposed
+ // memory but not GcHeap memory, so track their
+ // states separately.
+ byrefStatesMatchGcHeapStates = false;
+ }
+ }
+ else
{
- // If it doesn't define a local, then it might update the heap.
- fgCurHeapDef = true;
+ // If it doesn't define a local, then it might update GcHeap/ByrefExposed.
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
}
}
@@ -20967,7 +20900,7 @@ GenTreePtr Compiler::fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode,
// fgCurDefSet and fgCurUseSet into local variables defSet_BeforeSplit and useSet_BeforeSplit.
// The cached values will be used to restore fgCurDefSet and fgCurUseSet once we see the GT_COLON
// node.
- tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree, asgdLclVar);
+ tree = fgLegacyPerStatementLocalVarLiveness(tree->gtNext, tree);
// We must have been returned here after seeing a GT_QMARK node.
noway_assert(tree->gtOper == GT_QMARK);
diff --git a/src/jit/codegenlinear.cpp b/src/jit/codegenlinear.cpp
index 9713288e08..329c4a755f 100644
--- a/src/jit/codegenlinear.cpp
+++ b/src/jit/codegenlinear.cpp
@@ -133,9 +133,8 @@ void CodeGen::genCodeForBBlist()
*/
BasicBlock* block;
- BasicBlock* lblk; /* previous block */
- for (lblk = nullptr, block = compiler->fgFirstBB; block != nullptr; lblk = block, block = block->bbNext)
+ for (block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
{
#ifdef DEBUG
if (compiler->verbose)
@@ -284,7 +283,7 @@ void CodeGen::genCodeForBBlist()
}
#endif
// We should never have a block that falls through into the Cold section
- noway_assert(!lblk->bbFallsThrough());
+ noway_assert(!block->bbPrev->bbFallsThrough());
// We require the block that starts the Cold section to have a label
noway_assert(block->bbEmitCookie);
@@ -602,7 +601,7 @@ void CodeGen::genCodeForBBlist()
break;
case BBJ_CALLFINALLY:
- block = genCallFinally(block, lblk);
+ block = genCallFinally(block);
break;
#if FEATURE_EH_FUNCLETS
@@ -906,6 +905,13 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree)
// Load local variable from its home location.
inst_RV_TT(ins, dstReg, unspillTree, 0, attr);
+#elif defined(_TARGET_ARM_)
+ var_types targetType = unspillTree->gtType;
+ instruction ins = ins_Load(targetType, compiler->isSIMDTypeLocalAligned(lcl->gtLclNum));
+ emitAttr attr = emitTypeSize(targetType);
+
+ // Load local variable from its home location.
+ inst_RV_TT(ins, dstReg, unspillTree, 0, attr);
#else
NYI("Unspilling not implemented for this target architecture.");
#endif
@@ -1203,22 +1209,16 @@ void CodeGen::genConsumeRegs(GenTree* tree)
}
#endif // !defined(_TARGET_64BIT_)
- if (tree->isContained())
+ if (tree->isUsedFromSpillTemp())
{
- if (tree->isContainedSpillTemp())
- {
- // spill temps are un-tracked and hence no need to update life
- }
- else if (tree->isIndir())
+ // spill temps are un-tracked and hence no need to update life
+ }
+ else if (tree->isContained())
+ {
+ if (tree->isIndir())
{
genConsumeAddress(tree->AsIndir()->Addr());
}
- else if (tree->OperGet() == GT_AND)
- {
- // This is the special contained GT_AND that we created in Lowering::TreeNodeInfoInitCmp()
- // Now we need to consume the operands of the GT_AND node.
- genConsumeOperands(tree->AsOp());
- }
#ifdef _TARGET_XARCH_
else if (tree->OperGet() == GT_LCL_VAR)
{
diff --git a/src/jit/codegenlinear.h b/src/jit/codegenlinear.h
index 406ab779f1..c8a5af657a 100644
--- a/src/jit/codegenlinear.h
+++ b/src/jit/codegenlinear.h
@@ -93,10 +93,11 @@ void genSIMDCheck(GenTree* treeNode);
// their size rounded to TARGET_POINTER_SIZE (which is 8 bytes on 64-bit targets) and hence
// Vector3 locals could be treated as TYP_SIMD16 while reading/writing.
void genStoreIndTypeSIMD12(GenTree* treeNode);
-void genStoreLclFldTypeSIMD12(GenTree* treeNode);
void genLoadIndTypeSIMD12(GenTree* treeNode);
+void genStoreLclTypeSIMD12(GenTree* treeNode);
void genLoadLclTypeSIMD12(GenTree* treeNode);
#ifdef _TARGET_X86_
+void genStoreSIMD12ToStack(regNumber operandReg, regNumber tmpReg);
void genPutArgStkSIMD12(GenTree* treeNode);
#endif // _TARGET_X86_
#endif // FEATURE_SIMD
@@ -217,7 +218,7 @@ void genCallInstruction(GenTreePtr call);
void genJmpMethod(GenTreePtr jmp);
-BasicBlock* genCallFinally(BasicBlock* block, BasicBlock* lblk);
+BasicBlock* genCallFinally(BasicBlock* block);
#if FEATURE_EH_FUNCLETS
void genEHCatchRet(BasicBlock* block);
diff --git a/src/jit/codegenxarch.cpp b/src/jit/codegenxarch.cpp
index 8e0af48799..e893da6035 100644
--- a/src/jit/codegenxarch.cpp
+++ b/src/jit/codegenxarch.cpp
@@ -226,7 +226,7 @@ void CodeGen::genEmitGSCookieCheck(bool pushReg)
genPopRegs(pushedRegs, byrefPushedRegs, norefPushedRegs);
}
-BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
+BasicBlock* CodeGen::genCallFinally(BasicBlock* block)
{
#if FEATURE_EH_FUNCLETS
// Generate a call to the finally, like this:
@@ -263,10 +263,14 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
}
else
{
+// TODO-Linux-x86: Do we need to handle the GC information for this NOP or JMP specially, as is done for other
+// architectures?
+#ifndef JIT32_GCENCODER
// Because of the way the flowgraph is connected, the liveness info for this one instruction
// after the call is not (can not be) correct in cases where a variable has a last use in the
// handler. So turn off GC reporting for this single instruction.
getEmitter()->emitDisableGC();
+#endif // JIT32_GCENCODER
// Now go to where the finally funclet needs to return to.
if (block->bbNext->bbJumpDest == block->bbNext->bbNext)
@@ -282,7 +286,9 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
inst_JMP(EJ_jmp, block->bbNext->bbJumpDest);
}
+#ifndef JIT32_GCENCODER
getEmitter()->emitEnableGC();
+#endif // JIT32_GCENCODER
}
#else // !FEATURE_EH_FUNCLETS
@@ -348,8 +354,6 @@ BasicBlock* CodeGen::genCallFinally(BasicBlock* block, BasicBlock* lblk)
if (!(block->bbFlags & BBF_RETLESS_CALL))
{
assert(block->isBBCallAlwaysPair());
-
- lblk = block;
block = block->bbNext;
}
return block;
@@ -515,13 +519,13 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
GenTree* regOp = op1;
GenTree* rmOp = op2;
- // Set rmOp to the contained memory operand (if any)
- if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == REG_RAX)))
+ // Set rmOp to the memory operand (if any)
+ if (op1->isUsedFromMemory() || (op2->isUsedFromReg() && (op2->gtRegNum == REG_RAX)))
{
regOp = op2;
rmOp = op1;
}
- assert(!regOp->isContained());
+ assert(regOp->isUsedFromReg());
// Setup targetReg when neither of the source operands was a matching register
if (regOp->gtRegNum != REG_RAX)
@@ -569,12 +573,12 @@ void CodeGen::genCodeForLongUMod(GenTreeOp* node)
GenTree* const dividendLo = dividend->gtOp1;
GenTree* const dividendHi = dividend->gtOp2;
- assert(!dividendLo->isContained());
- assert(!dividendHi->isContained());
+ assert(dividendLo->isUsedFromReg());
+ assert(dividendHi->isUsedFromReg());
GenTree* const divisor = node->gtOp2;
assert(divisor->gtSkipReloadOrCopy()->OperGet() == GT_CNS_INT);
- assert(!divisor->gtSkipReloadOrCopy()->isContained());
+ assert(divisor->gtSkipReloadOrCopy()->isUsedFromReg());
assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal >= 2);
assert(divisor->gtSkipReloadOrCopy()->AsIntCon()->gtIconVal <= 0x3fffffff);
@@ -656,16 +660,16 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
var_types targetType = treeNode->TypeGet();
emitter* emit = getEmitter();
- // dividend is not contained.
- assert(!dividend->isContained());
+ // dividend is in a register.
+ assert(dividend->isUsedFromReg());
genConsumeOperands(treeNode->AsOp());
if (varTypeIsFloating(targetType))
{
- // divisor is not contained or if contained is a memory op.
+ // Check that divisor is a valid operand.
// Note that a reg optional operand is a treated as a memory op
// if no register is allocated to it.
- assert(!divisor->isContained() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl() ||
+ assert(divisor->isUsedFromReg() || divisor->isMemoryOp() || divisor->IsCnsFltOrDbl() ||
divisor->IsRegOptional());
// Floating point div/rem operation
@@ -675,7 +679,7 @@ void CodeGen::genCodeForDivMod(GenTreeOp* treeNode)
{
emit->emitInsBinary(genGetInsForOper(treeNode->gtOper, targetType), size, treeNode, divisor);
}
- else if (!divisor->isContained() && divisor->gtRegNum == targetReg)
+ else if (divisor->isUsedFromReg() && divisor->gtRegNum == targetReg)
{
// It is not possible to generate 2-operand divss or divsd where reg2 = reg1 / reg2
// because divss/divsd reg1, reg2 will over-write reg1. Therefore, in case of AMD64
@@ -773,8 +777,8 @@ void CodeGen::genCodeForBinary(GenTree* treeNode)
GenTreePtr op1 = treeNode->gtGetOp1();
GenTreePtr op2 = treeNode->gtGetOp2();
- // Commutative operations can mark op1 as contained to generate "op reg, memop/immed"
- if (op1->isContained())
+ // Commutative operations can mark op1 as contained or reg-optional to generate "op reg, memop/immed"
+ if (!op1->isUsedFromReg())
{
assert(treeNode->OperIsCommutative());
assert(op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl() || op1->IsIntCnsFitsInI32() || op1->IsRegOptional());
@@ -788,8 +792,8 @@ void CodeGen::genCodeForBinary(GenTree* treeNode)
// The arithmetic node must be sitting in a register (since it's not contained)
noway_assert(targetReg != REG_NA);
- regNumber op1reg = op1->isContained() ? REG_NA : op1->gtRegNum;
- regNumber op2reg = op2->isContained() ? REG_NA : op2->gtRegNum;
+ regNumber op1reg = op1->isUsedFromReg() ? op1->gtRegNum : REG_NA;
+ regNumber op2reg = op2->isUsedFromReg() ? op2->gtRegNum : REG_NA;
GenTreePtr dst;
GenTreePtr src;
@@ -814,7 +818,7 @@ void CodeGen::genCodeForBinary(GenTree* treeNode)
}
// now we know there are 3 different operands so attempt to use LEA
else if (oper == GT_ADD && !varTypeIsFloating(treeNode) && !treeNode->gtOverflowEx() // LEA does not set flags
- && (op2->isContainedIntOrIImmed() || !op2->isContained()) && !treeNode->gtSetFlags())
+ && (op2->isContainedIntOrIImmed() || op2->isUsedFromReg()) && !treeNode->gtSetFlags())
{
if (op2->isContainedIntOrIImmed())
{
@@ -936,7 +940,7 @@ void CodeGen::genStructReturn(GenTreePtr treeNode)
{
// Right now the only enregistrable structs supported are SIMD vector types.
assert(varTypeIsSIMD(op1));
- assert(!op1->isContained());
+ assert(op1->isUsedFromReg());
// This is a case of operand is in a single reg and needs to be
// returned in multiple ABI return registers.
@@ -974,7 +978,7 @@ void CodeGen::genStructReturn(GenTreePtr treeNode)
}
else
{
- assert(op1->isContained());
+ assert(op1->isUsedFromMemory());
// Copy var on stack into ABI return registers
int offset = 0;
@@ -1328,7 +1332,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
else
{
GenTreePtr operand = treeNode->gtGetOp1();
- assert(!operand->isContained());
+ assert(operand->isUsedFromReg());
regNumber operandReg = genConsumeReg(operand);
if (operandReg != targetReg)
@@ -1374,7 +1378,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_RSH_LO:
// TODO-X86-CQ: This only handles the case where the operand being shifted is in a register. We don't
// need sourceHi to be always in reg in case of GT_LSH_HI (because it could be moved from memory to
- // targetReg if sourceHi is a contained mem-op). Similarly for GT_RSH_LO, sourceLo could be marked as
+ // targetReg if sourceHi is a memory operand). Similarly for GT_RSH_LO, sourceLo could be marked as
// contained memory-op. Even if not a memory-op, we could mark it as reg-optional.
genCodeForShiftLong(treeNode);
break;
@@ -1423,7 +1427,6 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
if (!treeNode->InReg() && !(treeNode->gtFlags & GTF_SPILLED))
{
assert(!isRegCandidate);
-
#if defined(FEATURE_SIMD) && defined(_TARGET_X86_)
// Loading of TYP_SIMD12 (i.e. Vector3) variable
if (treeNode->TypeGet() == TYP_SIMD12)
@@ -1486,10 +1489,11 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
// storing of TYP_SIMD12 (i.e. Vector3) field
if (treeNode->TypeGet() == TYP_SIMD12)
{
- genStoreLclFldTypeSIMD12(treeNode);
+ genStoreLclTypeSIMD12(treeNode);
break;
}
-#endif
+#endif // FEATURE_SIMD
+
GenTreePtr op1 = treeNode->gtGetOp1();
genConsumeRegs(op1);
emit->emitInsBinary(ins_Store(targetType), emitTypeSize(treeNode), treeNode, op1);
@@ -1526,6 +1530,13 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
#endif // !defined(_TARGET_64BIT_)
#ifdef FEATURE_SIMD
+ // storing of TYP_SIMD12 (i.e. Vector3) field
+ if (treeNode->TypeGet() == TYP_SIMD12)
+ {
+ genStoreLclTypeSIMD12(treeNode);
+ break;
+ }
+
if (varTypeIsSIMD(targetType) && (targetReg != REG_NA) && op1->IsCnsIntOrI())
{
// This is only possible for a zero-init.
@@ -1547,25 +1558,24 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
}
else
{
- bool containedOp1 = op1->isContained();
// Look for the case where we have a constant zero which we've marked for reuse,
// but which isn't actually in the register we want. In that case, it's better to create
// zero in the target register, because an xor is smaller than a copy. Note that we could
// potentially handle this in the register allocator, but we can't always catch it there
// because the target may not have a register allocated for it yet.
- if (!containedOp1 && (op1->gtRegNum != treeNode->gtRegNum) &&
+ if (op1->isUsedFromReg() && (op1->gtRegNum != treeNode->gtRegNum) &&
(op1->IsIntegralConst(0) || op1->IsFPZero()))
{
op1->gtRegNum = REG_NA;
op1->ResetReuseRegVal();
- containedOp1 = true;
}
- if (containedOp1)
+ if (!op1->isUsedFromReg())
{
- // Currently, we assume that the contained source of a GT_STORE_LCL_VAR writing to a register
- // must be a constant. However, in the future we might want to support a contained memory op.
- // This is a bit tricky because we have to decide it's contained before register allocation,
+ // Currently, we assume that the non-reg source of a GT_STORE_LCL_VAR writing to a register
+ // must be a constant. However, in the future we might want to support an operand used from
+ // memory. This is a bit tricky because we have to decide it can be used from memory before
+ // register allocation,
// and this would be a case where, once that's done, we need to mark that node as always
// requiring a register - which we always assume now anyway, but once we "optimize" that
// we'll have to take cases like this into account.
@@ -1682,7 +1692,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
// CQ: When possible use LEA for mul by imm 3, 5 or 9
ssize_t imm = immOp->AsIntConCommon()->IconValue();
- if (!requiresOverflowCheck && !rmOp->isContained() && ((imm == 3) || (imm == 5) || (imm == 9)))
+ if (!requiresOverflowCheck && rmOp->isUsedFromReg() && ((imm == 3) || (imm == 5) || (imm == 9)))
{
// We will use the LEA instruction to perform this multiply
// Note that an LEA with base=x, index=x and scale=(imm-1) computes x*imm when imm=3,5 or 9.
@@ -1712,15 +1722,15 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
ins = genGetInsForOper(GT_MUL, targetType);
}
- // Set rmOp to the contain memory operand (if any)
+ // Set rmOp to the memory operand (if any)
// or set regOp to the op2 when it has the matching target register for our multiply op
//
- if (op1->isContained() || (!op2->isContained() && (op2->gtRegNum == mulTargetReg)))
+ if (op1->isUsedFromMemory() || (op2->isUsedFromReg() && (op2->gtRegNum == mulTargetReg)))
{
regOp = op2;
rmOp = op1;
}
- assert(!regOp->isContained());
+ assert(regOp->isUsedFromReg());
// Setup targetReg when neither of the source operands was a matching register
if (regOp->gtRegNum != mulTargetReg)
@@ -1781,6 +1791,8 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_LE:
case GT_GE:
case GT_GT:
+ case GT_TEST_EQ:
+ case GT_TEST_NE:
{
// TODO-XArch-CQ: Check if we can use the currently set flags.
// TODO-XArch-CQ: Check for the case where we can simply transfer the carry bit to a register
@@ -2089,7 +2101,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
case GT_NULLCHECK:
{
- assert(!treeNode->gtOp.gtOp1->isContained());
+ assert(treeNode->gtOp.gtOp1->isUsedFromReg());
regNumber reg = genConsumeReg(treeNode->gtOp.gtOp1);
emit->emitIns_AR_R(INS_cmp, EA_4BYTE, reg, reg, 0);
}
@@ -2180,7 +2192,7 @@ void CodeGen::genCodeForTreeNode(GenTreePtr treeNode)
#if !defined(_TARGET_64BIT_)
case GT_LONG:
- assert(!treeNode->isContained());
+ assert(treeNode->isUsedFromReg());
genConsumeRegs(treeNode);
break;
#endif
@@ -2631,16 +2643,14 @@ void CodeGen::genLclHeap(GenTreePtr tree)
// Loop:
genDefineTempLabel(loop);
-#if defined(_TARGET_AMD64_)
- // Push two 8-byte zeros. This matches the 16-byte STACK_ALIGN value.
- static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2));
- inst_IV(INS_push_hide, 0); // --- push 8-byte 0
- inst_IV(INS_push_hide, 0); // --- push 8-byte 0
-#elif defined(_TARGET_X86_)
- // Push a single 4-byte zero. This matches the 4-byte STACK_ALIGN value.
- static_assert_no_msg(STACK_ALIGN == REGSIZE_BYTES);
- inst_IV(INS_push_hide, 0); // --- push 4-byte 0
-#endif // _TARGET_X86_
+ static_assert_no_msg((STACK_ALIGN % REGSIZE_BYTES) == 0);
+ unsigned const count = (STACK_ALIGN / REGSIZE_BYTES);
+
+ for (unsigned i = 0; i < count; i++)
+ {
+ inst_IV(INS_push_hide, 0); // --- push REG_SIZE bytes of 0
+ }
+ // Note that the stack must always be aligned to STACK_ALIGN bytes
// Decrement the loop counter and loop if not done.
inst_RV(INS_dec, regCnt, TYP_I_IMPL);
@@ -2841,8 +2851,8 @@ void CodeGen::genCodeForInitBlkRepStos(GenTreeBlk* initBlkNode)
}
#ifdef DEBUG
- assert(!dstAddr->isContained());
- assert(!initVal->isContained());
+ assert(dstAddr->isUsedFromReg());
+ assert(initVal->isUsedFromReg());
#ifdef _TARGET_AMD64_
assert(size != 0);
#endif
@@ -2878,8 +2888,8 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* initBlkNode)
initVal = initVal->gtGetOp1();
}
- assert(!dstAddr->isContained());
- assert(!initVal->isContained() || (initVal->IsIntegralConst(0) && ((size & 0xf) == 0)));
+ assert(dstAddr->isUsedFromReg());
+ assert(initVal->isUsedFromReg() || (initVal->IsIntegralConst(0) && ((size & 0xf) == 0)));
assert(size != 0);
assert(size <= INITBLK_UNROLL_LIMIT);
assert(initVal->gtSkipReloadOrCopy()->IsCnsIntOrI());
@@ -2979,8 +2989,8 @@ void CodeGen::genCodeForInitBlk(GenTreeBlk* initBlkNode)
initVal = initVal->gtGetOp1();
}
- assert(!dstAddr->isContained());
- assert(!initVal->isContained());
+ assert(dstAddr->isUsedFromReg());
+ assert(initVal->isUsedFromReg());
if (blockSize != 0)
{
@@ -3064,7 +3074,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
if (source->gtOper == GT_IND)
{
srcAddr = source->gtGetOp1();
- if (!srcAddr->isContained())
+ if (srcAddr->isUsedFromReg())
{
genConsumeReg(srcAddr);
}
@@ -3086,7 +3096,7 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* cpBlkNode)
srcAddr = source;
}
- if (!dstAddr->isContained())
+ if (dstAddr->isUsedFromReg())
{
genConsumeReg(dstAddr);
}
@@ -3171,7 +3181,7 @@ void CodeGen::genCodeForCpBlkRepMovs(GenTreeBlk* cpBlkNode)
GenTreePtr srcAddr = nullptr;
#ifdef DEBUG
- assert(!dstAddr->isContained());
+ assert(dstAddr->isUsedFromReg());
assert(source->isContained());
#ifdef _TARGET_X86_
@@ -3352,7 +3362,7 @@ void CodeGen::genStructPutArgUnroll(GenTreePutArgStk* putArgNode)
assert(src->gtOper == GT_OBJ);
- if (!src->gtOp.gtOp1->isContained())
+ if (src->gtOp.gtOp1->isUsedFromReg())
{
genConsumeReg(src->gtOp.gtOp1);
}
@@ -3544,7 +3554,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
if (source->gtOper == GT_IND)
{
srcAddr = source->gtGetOp1();
- assert(!srcAddr->isContained());
+ assert(srcAddr->isUsedFromReg());
}
else
{
@@ -3557,7 +3567,7 @@ void CodeGen::genCodeForCpObj(GenTreeObj* cpObjNode)
#ifdef DEBUG
bool isRepMovspUsed = false;
- assert(!dstAddr->isContained());
+ assert(dstAddr->isUsedFromReg());
// If the GenTree node has data about GC pointers, this means we're dealing
// with CpObj, so this requires special logic.
@@ -3720,7 +3730,7 @@ void CodeGen::genCodeForCpBlk(GenTreeBlk* cpBlkNode)
if (source->gtOper == GT_IND)
{
srcAddr = source->gtGetOp1();
- assert(!srcAddr->isContained());
+ assert(srcAddr->isUsedFromReg());
}
else
{
@@ -3863,16 +3873,16 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
GenTreeBoundsChk* bndsChk = oper->AsBoundsChk();
- GenTreePtr arrLen = bndsChk->gtArrLen;
GenTreePtr arrIndex = bndsChk->gtIndex;
+ GenTreePtr arrLen = bndsChk->gtArrLen;
GenTreePtr arrRef = nullptr;
int lenOffset = 0;
GenTree * src1, *src2;
emitJumpKind jmpKind;
- genConsumeRegs(arrLen);
genConsumeRegs(arrIndex);
+ genConsumeRegs(arrLen);
if (arrIndex->isContainedIntOrIImmed())
{
@@ -3899,7 +3909,7 @@ void CodeGen::genRangeCheck(GenTreePtr oper)
// cmp reg, [mem] (if arrLen is a memory op)
//
// That is only one of arrIndex or arrLen can be a memory op.
- assert(!arrIndex->isContainedMemoryOp() || !arrLen->isContainedMemoryOp());
+ assert(!arrIndex->isUsedFromMemory() || !arrLen->isUsedFromMemory());
src1 = arrIndex;
src2 = arrLen;
@@ -4211,7 +4221,7 @@ void CodeGen::genCodeForShift(GenTreePtr tree)
{
// Only the non-RMW case here.
assert(tree->OperIsShiftOrRotate());
- assert(!tree->gtOp.gtOp1->isContained());
+ assert(tree->gtOp.gtOp1->isUsedFromReg());
assert(tree->gtRegNum != REG_NA);
genConsumeOperands(tree->AsOp());
@@ -4277,8 +4287,8 @@ void CodeGen::genCodeForShiftLong(GenTreePtr tree)
GenTree* operand = tree->gtOp.gtOp1;
assert(operand->OperGet() == GT_LONG);
- assert(!operand->gtOp.gtOp1->isContained());
- assert(!operand->gtOp.gtOp2->isContained());
+ assert(operand->gtOp.gtOp1->isUsedFromReg());
+ assert(operand->gtOp.gtOp2->isUsedFromReg());
GenTree* operandLo = operand->gtGetOp1();
GenTree* operandHi = operand->gtGetOp2();
@@ -4334,7 +4344,7 @@ void CodeGen::genCodeForShiftRMW(GenTreeStoreInd* storeInd)
assert(data->OperIsShiftOrRotate());
// This function only handles the RMW case.
- assert(data->gtOp.gtOp1->isContained());
+ assert(data->gtOp.gtOp1->isUsedFromMemory());
assert(data->gtOp.gtOp1->isIndir());
assert(Lowering::IndirsAreEquivalent(data->gtOp.gtOp1, storeInd));
assert(data->gtRegNum == REG_NA);
@@ -4580,7 +4590,7 @@ void CodeGen::genStoreInd(GenTreePtr node)
assert(storeInd->IsRMWDstOp1());
rmwSrc = data->gtGetOp1();
rmwDst = data->gtGetOp1();
- assert(rmwSrc->isContained());
+ assert(rmwSrc->isUsedFromMemory());
}
assert(rmwSrc != nullptr);
@@ -4616,8 +4626,7 @@ void CodeGen::genStoreInd(GenTreePtr node)
assert(rmwSrc == data->gtGetOp2());
genCodeForShiftRMW(storeInd);
}
- else if (!compiler->opts.compDbgCode && data->OperGet() == GT_ADD &&
- (rmwSrc->IsIntegralConst(1) || rmwSrc->IsIntegralConst(-1)))
+ else if (data->OperGet() == GT_ADD && (rmwSrc->IsIntegralConst(1) || rmwSrc->IsIntegralConst(-1)))
{
// Generate "inc/dec [mem]" instead of "add/sub [mem], 1".
//
@@ -4858,11 +4867,6 @@ void CodeGen::genCallInstruction(GenTreePtr node)
if (arg->OperGet() != GT_ARGPLACE && !(arg->gtFlags & GTF_LATE_ARG))
{
#if defined(_TARGET_X86_)
- assert((arg->OperGet() == GT_PUTARG_STK) || (arg->OperGet() == GT_LONG));
- if (arg->OperGet() == GT_LONG)
- {
- assert((arg->gtGetOp1()->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp2()->OperGet() == GT_PUTARG_STK));
- }
if ((arg->OperGet() == GT_PUTARG_STK) && (arg->gtGetOp1()->OperGet() == GT_FIELD_LIST))
{
fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
@@ -4886,9 +4890,9 @@ void CodeGen::genCallInstruction(GenTreePtr node)
stackArgBytes += argBytes;
}
else
- {
#endif // FEATURE_PUT_STRUCT_ARG_STK
+ {
stackArgBytes += genTypeSize(genActualType(arg->TypeGet()));
}
}
@@ -5001,6 +5005,20 @@ void CodeGen::genCallInstruction(GenTreePtr node)
#endif // defined(_TARGET_X86_)
+#ifdef FEATURE_AVX_SUPPORT
+ // When it's a PInvoke call and the call type is USER function, we issue VZEROUPPER here
+ // if the function contains 256bit AVX instructions, this is to avoid AVX-256 to Legacy SSE
+ // transition penalty, assuming the user function contains legacy SSE instruction.
+ // To limit code size increase impact: we only issue VZEROUPPER before PInvoke call, not issue
+ // VZEROUPPER after PInvoke call because transition penalty from legacy SSE to AVX only happens
+ // when there's preceding 256-bit AVX to legacy SSE transition penalty.
+ if (call->IsPInvoke() && (call->gtCallType == CT_USER_FUNC) && getEmitter()->Contains256bitAVX())
+ {
+ assert(compiler->getSIMDInstructionSet() == InstructionSet_AVX);
+ instGen(INS_vzeroupper);
+ }
+#endif
+
if (target != nullptr)
{
#ifdef _TARGET_X86_
@@ -5020,7 +5038,7 @@ void CodeGen::genCallInstruction(GenTreePtr node)
assert(target->OperGet() == GT_IND);
GenTree* addr = target->AsIndir()->Addr();
- assert(!addr->isContained());
+ assert(addr->isUsedFromReg());
genConsumeReg(addr);
genCopyRegIfNeeded(addr, REG_VIRTUAL_STUB_TARGET);
@@ -5113,6 +5131,15 @@ void CodeGen::genCallInstruction(GenTreePtr node)
retSize MULTIREG_HAS_SECOND_GC_RET_ONLY_ARG(secondRetSize), ilOffset);
}
+#if defined(UNIX_X86_ABI)
+ // Put back the stack pointer if there was any padding for stack alignment
+ unsigned padStackAlign = call->fgArgInfo->GetPadStackAlign();
+ if (padStackAlign != 0)
+ {
+ inst_RV_IV(INS_add, REG_SPBASE, padStackAlign * TARGET_POINTER_SIZE, EA_PTRSIZE);
+ }
+#endif // UNIX_X86_ABI
+
// if it was a pinvoke we may have needed to get the address of a label
if (genPendingCallLabel)
{
@@ -6064,7 +6091,7 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
GenTreePtr op2 = tree->gtOp2;
var_types op1Type = op1->TypeGet();
var_types op2Type = op2->TypeGet();
- regNumber targetReg = treeNode->gtRegNum;
+ regNumber targetReg = tree->gtRegNum;
// Case of op1 == 0 or op1 != 0:
// Optimize generation of 'test' instruction if op1 sets flags.
@@ -6081,7 +6108,7 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
assert(realOp1->gtSetZSFlags());
// Must be (in)equality against zero.
- assert(tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE);
+ assert(tree->OperIs(GT_EQ, GT_NE));
assert(op2->IsIntegralConst(0));
assert(op2->isContained());
@@ -6105,7 +6132,7 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
// If we have GT_JTRUE(GT_EQ/NE(GT_SIMD((in)Equality, v1, v2), true/false)),
// then we don't need to generate code for GT_EQ/GT_NE, since SIMD (in)Equality intrinsic
// would set or clear Zero flag.
- if ((targetReg == REG_NA) && (tree->OperGet() == GT_EQ || tree->OperGet() == GT_NE))
+ if ((targetReg == REG_NA) && tree->OperIs(GT_EQ, GT_NE))
{
// Is it a SIMD (in)Equality that doesn't need to materialize result into a register?
if ((op1->gtRegNum == REG_NA) && op1->IsSIMDEqualityOrInequality())
@@ -6124,128 +6151,67 @@ void CodeGen::genCompareInt(GenTreePtr treeNode)
genConsumeOperands(tree);
- instruction ins;
- emitAttr cmpAttr;
-
// TODO-CQ: We should be able to support swapping op1 and op2 to generate cmp reg, imm.
// https://github.com/dotnet/coreclr/issues/7270
assert(!op1->isContainedIntOrIImmed()); // We no longer support
assert(!varTypeIsFloating(op2Type));
-#ifdef _TARGET_X86_
- assert(!varTypeIsLong(op1Type) && !varTypeIsLong(op2Type));
-#endif // _TARGET_X86_
-
- // By default we use an int32 sized cmp instruction
- //
- ins = INS_cmp;
- var_types cmpType = TYP_INT;
-
- // In the if/then/else statement below we may change the
- // 'cmpType' and/or 'ins' to generate a smaller instruction
+ instruction ins;
- // Are we comparing two values that are the same size?
- //
- if (genTypeSize(op1Type) == genTypeSize(op2Type))
+ if (tree->OperIs(GT_TEST_EQ, GT_TEST_NE))
{
- if (op1Type == op2Type)
- {
- // If both types are exactly the same we can use that type
- cmpType = op1Type;
- }
- else if (genTypeSize(op1Type) == 8)
- {
- // If we have two different int64 types we need to use a long compare
- cmpType = TYP_LONG;
- }
-
- cmpAttr = emitTypeSize(cmpType);
+ ins = INS_test;
}
- else // Here we know that (op1Type != op2Type)
+ else if (op1->isUsedFromReg() && op2->IsIntegralConst(0))
{
- // Do we have a short compare against a constant in op2?
- //
- // We checked for this case in TreeNodeInfoInitCmp() and if we can perform a small
- // compare immediate we labeled this compare with a GTF_RELOP_SMALL
- // and for unsigned small non-equality compares the GTF_UNSIGNED flag.
- //
- if (op2->isContainedIntOrIImmed() && ((tree->gtFlags & GTF_RELOP_SMALL) != 0))
- {
- assert(varTypeIsSmall(op1Type));
- cmpType = op1Type;
- }
-#ifdef _TARGET_AMD64_
- else // compare two different sized operands
- {
- // For this case we don't want any memory operands, only registers or immediates
- //
- assert(!op1->isContainedMemoryOp());
- assert(!op2->isContainedMemoryOp());
+ // We're comparing a register to 0 so we can generate "test reg1, reg1"
+ // instead of the longer "cmp reg1, 0"
+ ins = INS_test;
+ op2 = op1;
+ }
+ else
+ {
+ ins = INS_cmp;
+ }
- // Check for the case where one operand is an int64 type
- // Lower should have placed 32-bit operand in a register
- // for signed comparisons we will sign extend the 32-bit value in place.
- //
- bool op1Is64Bit = (genTypeSize(op1Type) == 8);
- bool op2Is64Bit = (genTypeSize(op2Type) == 8);
- if (op1Is64Bit)
- {
- cmpType = TYP_LONG;
- if (!(tree->gtFlags & GTF_UNSIGNED) && !op2Is64Bit)
- {
- assert(op2->gtRegNum != REG_NA);
- inst_RV_RV(INS_movsxd, op2->gtRegNum, op2->gtRegNum, op2Type);
- }
- }
- else if (op2Is64Bit)
- {
- cmpType = TYP_LONG;
- if (!(tree->gtFlags & GTF_UNSIGNED) && !op1Is64Bit)
- {
- assert(op1->gtRegNum != REG_NA);
- }
- }
- }
-#endif // _TARGET_AMD64_
+ var_types type;
- cmpAttr = emitTypeSize(cmpType);
+ if (op1Type == op2Type)
+ {
+ type = op1Type;
}
-
- // See if we can generate a "test" instruction instead of a "cmp".
- // For this to generate the correct conditional branch we must have
- // a compare against zero.
- //
- if (op2->IsIntegralConst(0))
+ else if (genTypeSize(op1Type) == genTypeSize(op2Type))
{
- if (op1->isContained())
- {
- // op1 can be a contained memory op
- // or the special contained GT_AND that we created in Lowering::TreeNodeInfoInitCmp()
- //
- if ((op1->OperGet() == GT_AND) && op1->gtGetOp2()->isContainedIntOrIImmed() &&
- ((tree->OperGet() == GT_EQ) || (tree->OperGet() == GT_NE)))
- {
- ins = INS_test; // we will generate "test andOp1, andOp2CnsVal"
- op2 = op1->gtOp.gtOp2; // must assign op2 before we overwrite op1
- op1 = op1->gtOp.gtOp1; // overwrite op1
-
- if (op1->isContainedMemoryOp())
- {
- // use the size andOp1 if it is a contained memoryop.
- cmpAttr = emitTypeSize(op1->TypeGet());
- }
- // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
- }
- }
- else // op1 is not contained thus it must be in a register
- {
- ins = INS_test;
- op2 = op1; // we will generate "test reg1,reg1"
- // fallthrough to emit->emitInsBinary(ins, cmpAttr, op1, op2);
- }
+ // If the types are different but have the same size then we'll use TYP_INT or TYP_LONG.
+ // This primarily deals with small type mixes (e.g. byte/ubyte) that need to be widened
+ // and compared as int. We should not get long type mixes here but handle that as well
+ // just in case.
+ type = genTypeSize(op1Type) == 8 ? TYP_LONG : TYP_INT;
}
-
- getEmitter()->emitInsBinary(ins, cmpAttr, op1, op2);
+ else
+ {
+ // In the types are different simply use TYP_INT. This deals with small type/int type
+ // mixes (e.g. byte/short ubyte/int) that need to be widened and compared as int.
+ // Lowering is expected to handle any mixes that involve long types (e.g. int/long).
+ type = TYP_INT;
+ }
+
+ // The common type cannot be larger than the machine word size
+ assert(genTypeSize(type) <= genTypeSize(TYP_I_IMPL));
+ // The common type cannot be smaller than any of the operand types, we're probably mixing int/long
+ assert(genTypeSize(type) >= max(genTypeSize(op1Type), genTypeSize(op2Type)));
+ // TYP_UINT and TYP_ULONG should not appear here, only small types can be unsigned
+ assert(!varTypeIsUnsigned(type) || varTypeIsSmall(type));
+ // Small unsigned int types (TYP_BOOL can use anything) should use unsigned comparisons
+ assert(!(varTypeIsSmallInt(type) && varTypeIsUnsigned(type)) || ((tree->gtFlags & GTF_UNSIGNED) != 0));
+ // If op1 is smaller then it cannot be in memory, we're probably missing a cast
+ assert((genTypeSize(op1Type) >= genTypeSize(type)) || !op1->isUsedFromMemory());
+ // If op2 is smaller then it cannot be in memory, we're probably missing a cast
+ assert((genTypeSize(op2Type) >= genTypeSize(type)) || !op2->isUsedFromMemory());
+ // If op2 is a constant then it should fit in the common type
+ assert(!op2->IsCnsIntOrI() || genTypeCanRepresentValue(type, op2->AsIntCon()->IconValue()));
+
+ getEmitter()->emitInsBinary(ins, emitTypeSize(type), op1, op2);
// Are we evaluating this into a register?
if (targetReg != REG_NA)
@@ -6810,7 +6776,7 @@ void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
GenTreePtr op1 = treeNode->gtOp.gtOp1;
#ifdef DEBUG
// If not contained, must be a valid float reg.
- if (!op1->isContained())
+ if (op1->isUsedFromReg())
{
assert(genIsValidFloatReg(op1->gtRegNum));
}
@@ -6821,7 +6787,7 @@ void CodeGen::genFloatToFloatCast(GenTreePtr treeNode)
assert(varTypeIsFloating(srcType) && varTypeIsFloating(dstType));
genConsumeOperands(treeNode->AsOp());
- if (srcType == dstType && (!op1->isContained() && (targetReg == op1->gtRegNum)))
+ if (srcType == dstType && (op1->isUsedFromReg() && (targetReg == op1->gtRegNum)))
{
// source and destinations types are the same and also reside in the same register.
// we just need to consume and produce the reg in this case.
@@ -6861,7 +6827,7 @@ void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
GenTreePtr op1 = treeNode->gtOp.gtOp1;
#ifdef DEBUG
- if (!op1->isContained())
+ if (op1->isUsedFromReg())
{
assert(genIsValidIntReg(op1->gtRegNum));
}
@@ -6936,7 +6902,7 @@ void CodeGen::genIntToFloatCast(GenTreePtr treeNode)
// If we change the instructions below, FloatingPointUtils::convertUInt64ToDobule
// should be also updated for consistent conversion result.
assert(dstType == TYP_DOUBLE);
- assert(!op1->isContained());
+ assert(op1->isUsedFromReg());
// Set the flags without modifying op1.
// test op1Reg, op1Reg
@@ -6995,7 +6961,7 @@ void CodeGen::genFloatToIntCast(GenTreePtr treeNode)
GenTreePtr op1 = treeNode->gtOp.gtOp1;
#ifdef DEBUG
- if (!op1->isContained())
+ if (op1->isUsedFromReg())
{
assert(genIsValidFloatReg(op1->gtRegNum));
}
@@ -7374,7 +7340,7 @@ void CodeGen::genSSE2BitwiseOp(GenTreePtr treeNode)
// Move operand into targetReg only if the reg reserved for
// internal purpose is not the same as targetReg.
GenTreePtr op1 = treeNode->gtOp.gtOp1;
- assert(!op1->isContained());
+ assert(op1->isUsedFromReg());
regNumber operandReg = genConsumeReg(op1);
if (tmpReg != targetReg)
{
@@ -7497,7 +7463,7 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
#ifdef _TARGET_X86_
//---------------------------------------------------------------------
-// adjustStackForPutArgStk:
+// genAdjustStackForPutArgStk:
// adjust the stack pointer for a putArgStk node if necessary.
//
// Arguments:
@@ -7505,6 +7471,12 @@ unsigned CodeGen::getBaseVarForPutArgStk(GenTreePtr treeNode)
//
// Returns: true if the stack pointer was adjusted; false otherwise.
//
+// Notes:
+// Sets `m_pushStkArg` to true if the stack arg needs to be pushed,
+// false if the stack arg needs to be stored at the current stack
+// pointer address. This is exactly the opposite of the return value
+// of this function.
+//
bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
{
#ifdef FEATURE_SIMD
@@ -7562,11 +7534,10 @@ bool CodeGen::genAdjustStackForPutArgStk(GenTreePutArgStk* putArgStk)
}
//---------------------------------------------------------------------
-// genPutArgStkFieldList - generate code for passing an arg on the stack.
+// genPutArgStkFieldList - generate code for passing a GT_FIELD_LIST arg on the stack.
//
// Arguments
-// treeNode - the GT_PUTARG_STK node
-// targetType - the type of the treeNode
+// treeNode - the GT_PUTARG_STK node whose op1 is a GT_FIELD_LIST
//
// Return value:
// None
@@ -7578,24 +7549,36 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
// Set m_pushStkArg and pre-adjust the stack if necessary.
const bool preAdjustedStack = genAdjustStackForPutArgStk(putArgStk);
+
// For now, we only support the "push" case; we will push a full slot for the first field of each slot
// within the struct.
assert((putArgStk->isPushKind()) && !preAdjustedStack && m_pushStkArg);
- // If we have pre-adjusted the stack and are simply storing the fields in order) set the offset to 0.
+ // If we have pre-adjusted the stack and are simply storing the fields in order, set the offset to 0.
// (Note that this mode is not currently being used.)
// If we are pushing the arguments (i.e. we have not pre-adjusted the stack), then we are pushing them
// in reverse order, so we start with the current field offset at the size of the struct arg (which must be
// a multiple of the target pointer size).
unsigned currentOffset = (preAdjustedStack) ? 0 : putArgStk->getArgSize();
unsigned prevFieldOffset = currentOffset;
- regNumber tmpReg = REG_NA;
+ regNumber intTmpReg = REG_NA;
+ regNumber simdTmpReg = REG_NA;
if (putArgStk->gtRsvdRegs != RBM_NONE)
{
- assert(genCountBits(putArgStk->gtRsvdRegs) == 1);
- tmpReg = genRegNumFromMask(putArgStk->gtRsvdRegs);
- assert(genIsValidIntReg(tmpReg));
+ regMaskTP rsvdRegs = putArgStk->gtRsvdRegs;
+ if ((rsvdRegs & RBM_ALLINT) != 0)
+ {
+ intTmpReg = genRegNumFromMask(rsvdRegs & RBM_ALLINT);
+ assert(genIsValidIntReg(intTmpReg));
+ }
+ if ((rsvdRegs & RBM_ALLFLOAT) != 0)
+ {
+ simdTmpReg = genRegNumFromMask(rsvdRegs & RBM_ALLFLOAT);
+ assert(genIsValidFloatReg(simdTmpReg));
+ }
+ assert(genCountBits(rsvdRegs) == (unsigned)((intTmpReg == REG_NA) ? 0 : 1) + ((simdTmpReg == REG_NA) ? 0 : 1));
}
+
for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
{
GenTree* const fieldNode = current->Current();
@@ -7612,7 +7595,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
// assigned a register, and which is therefore contained.
// Unlike genConsumeReg(), it handles the case where no registers are being consumed.
genConsumeRegs(fieldNode);
- regNumber argReg = fieldNode->isContainedSpillTemp() ? REG_NA : fieldNode->gtRegNum;
+ regNumber argReg = fieldNode->isUsedFromSpillTemp() ? REG_NA : fieldNode->gtRegNum;
// If the field is slot-like, we can use a push instruction to store the entire register no matter the type.
//
@@ -7623,7 +7606,7 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
// able to detect stores into the outgoing argument area of the stack on x86.
const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevFieldOffset - fieldOffset) >= 4);
int adjustment = roundUp(currentOffset - fieldOffset, 4);
- if (fieldIsSlot)
+ if (fieldIsSlot && !varTypeIsSIMD(fieldType))
{
fieldType = genActualType(fieldType);
unsigned pushSize = genTypeSize(fieldType);
@@ -7641,12 +7624,13 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
else
{
m_pushStkArg = false;
+
// We always "push" floating point fields (i.e. they are full slot values that don't
// require special handling).
- assert(varTypeIsIntegralOrI(fieldNode));
+ assert(varTypeIsIntegralOrI(fieldNode) || varTypeIsSIMD(fieldNode));
+
// If we can't push this field, it needs to be in a register so that we can store
// it to the stack location.
- assert(tmpReg != REG_NA);
if (adjustment != 0)
{
// This moves the stack pointer to fieldOffset.
@@ -7658,15 +7642,16 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
}
// Does it need to be in a byte register?
- // If so, we'll use tmpReg, which must have been allocated as a byte register.
+ // If so, we'll use intTmpReg, which must have been allocated as a byte register.
// If it's already in a register, but not a byteable one, then move it.
if (varTypeIsByte(fieldType) && ((argReg == REG_NA) || ((genRegMask(argReg) & RBM_BYTE_REGS) == 0)))
{
- noway_assert((genRegMask(tmpReg) & RBM_BYTE_REGS) != 0);
+ assert(intTmpReg != REG_NA);
+ noway_assert((genRegMask(intTmpReg) & RBM_BYTE_REGS) != 0);
if (argReg != REG_NA)
{
- inst_RV_RV(INS_mov, tmpReg, argReg, fieldType);
- argReg = tmpReg;
+ inst_RV_RV(INS_mov, intTmpReg, argReg, fieldType);
+ argReg = intTmpReg;
}
}
}
@@ -7675,8 +7660,9 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
{
if (m_pushStkArg)
{
- if (fieldNode->isContainedSpillTemp())
+ if (fieldNode->isUsedFromSpillTemp())
{
+ assert(!varTypeIsSIMD(fieldType)); // Q: can we get here with SIMD?
assert(fieldNode->IsRegOptional());
TempDsc* tmp = getSpillTempDsc(fieldNode);
getEmitter()->emitIns_S(INS_push, emitActualTypeSize(fieldNode->TypeGet()), tmp->tdTempNum(), 0);
@@ -7709,25 +7695,35 @@ void CodeGen::genPutArgStkFieldList(GenTreePutArgStk* putArgStk)
}
else
{
- // The stack has been adjusted and we will load the field to tmpReg and then store it on the stack.
+ // The stack has been adjusted and we will load the field to intTmpReg and then store it on the stack.
assert(varTypeIsIntegralOrI(fieldNode));
switch (fieldNode->OperGet())
{
case GT_LCL_VAR:
- inst_RV_TT(INS_mov, tmpReg, fieldNode);
+ inst_RV_TT(INS_mov, intTmpReg, fieldNode);
break;
case GT_CNS_INT:
- genSetRegToConst(tmpReg, fieldNode->TypeGet(), fieldNode);
+ genSetRegToConst(intTmpReg, fieldNode->TypeGet(), fieldNode);
break;
default:
unreached();
}
- genStoreRegToStackArg(fieldType, tmpReg, fieldOffset - currentOffset);
+ genStoreRegToStackArg(fieldType, intTmpReg, fieldOffset - currentOffset);
}
}
else
{
- genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset);
+#if defined(_TARGET_X86_) && defined(FEATURE_SIMD)
+ if (fieldType == TYP_SIMD12)
+ {
+ assert(genIsValidFloatReg(simdTmpReg));
+ genStoreSIMD12ToStack(argReg, simdTmpReg);
+ }
+ else
+#endif // defined(_TARGET_X86_) && defined(FEATURE_SIMD)
+ {
+ genStoreRegToStackArg(fieldType, argReg, fieldOffset - currentOffset);
+ }
if (m_pushStkArg)
{
// We always push a slot-rounded size
@@ -7762,13 +7758,15 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
#ifdef _TARGET_X86_
-#ifdef FEATURE_SIMD
- if (targetType == TYP_SIMD12)
+#if defined(UNIX_X86_ABI)
+ // For each call, first stack argument has the padding for alignment
+ // if this value is not zero, use it to adjust the ESP
+ unsigned argPadding = putArgStk->getArgPadding();
+ if (argPadding != 0)
{
- genPutArgStkSIMD12(putArgStk);
- return;
+ inst_RV_IV(INS_sub, REG_SPBASE, argPadding * TARGET_POINTER_SIZE, EA_PTRSIZE);
}
-#endif // FEATURE_SIMD
+#endif
if (varTypeIsStruct(targetType))
{
@@ -7782,9 +7780,9 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
GenTreePtr data = putArgStk->gtOp1;
- // On a 32-bit target, all of the long arguments have been decomposed into
- // a separate putarg_stk for each of the upper and lower halves.
- noway_assert(targetType != TYP_LONG);
+ // On a 32-bit target, all of the long arguments are handled with GT_FIELD_LIST,
+ // and the type of the putArgStk is TYP_VOID.
+ assert(targetType != TYP_LONG);
const unsigned argSize = putArgStk->getArgSize();
assert((argSize % TARGET_POINTER_SIZE) == 0);
@@ -7808,7 +7806,7 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
else
{
// We should not see any contained nodes that are not immediates.
- assert(!data->isContained());
+ assert(data->isUsedFromReg());
genConsumeReg(data);
genPushReg(targetType, data->gtRegNum);
}
@@ -7844,13 +7842,14 @@ void CodeGen::genPutArgStk(GenTreePutArgStk* putArgStk)
GenTreePtr data = putArgStk->gtOp1;
- if (data->isContained())
+ if (data->isContainedIntOrIImmed())
{
getEmitter()->emitIns_S_I(ins_Store(targetType), emitTypeSize(targetType), baseVarNum, argOffset,
(int)data->AsIntConCommon()->IconValue());
}
else
{
+ assert(data->isUsedFromReg());
genConsumeReg(data);
getEmitter()->emitIns_S_R(ins_Store(targetType), emitTypeSize(targetType), data->gtRegNum, baseVarNum,
argOffset);
@@ -7996,6 +7995,14 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
{
var_types targetType = putArgStk->TypeGet();
+#if defined(_TARGET_X86_) && defined(FEATURE_SIMD)
+ if (targetType == TYP_SIMD12)
+ {
+ genPutArgStkSIMD12(putArgStk);
+ return;
+ }
+#endif // defined(_TARGET_X86_) && defined(FEATURE_SIMD)
+
if (varTypeIsSIMD(targetType))
{
regNumber srcReg = genConsumeReg(putArgStk->gtGetOp1());
@@ -8078,7 +8085,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
slotAttr = EA_BYREF;
}
- const unsigned offset = i * 4;
+ const unsigned offset = i * TARGET_POINTER_SIZE;
if (srcAddrInReg)
{
getEmitter()->emitIns_AR_R(INS_push, slotAttr, REG_NA, srcRegNum, offset);
@@ -8087,7 +8094,7 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
{
getEmitter()->emitIns_S(INS_push, slotAttr, srcLclNum, srcLclOffset + offset);
}
- genStackLevel += 4;
+ genStackLevel += TARGET_POINTER_SIZE;
}
#else // !defined(_TARGET_X86_)
@@ -8175,11 +8182,11 @@ void CodeGen::genPutStructArgStk(GenTreePutArgStk* putArgStk)
*
* Create and record GC Info for the function.
*/
-#ifdef _TARGET_AMD64_
+#ifndef JIT32_GCENCODER
void
-#else // !_TARGET_AMD64_
+#else // !JIT32_GCENCODER
void*
-#endif // !_TARGET_AMD64_
+#endif // !JIT32_GCENCODER
CodeGen::genCreateAndStoreGCInfo(unsigned codeSize, unsigned prologSize, unsigned epilogSize DEBUGARG(void* codePtr))
{
#ifdef JIT32_GCENCODER
@@ -8381,6 +8388,14 @@ void CodeGen::genCreateAndStoreGCInfoX64(unsigned codeSize, unsigned prologSize
gcInfoEncoder->SetSizeOfEditAndContinuePreservedArea(preservedAreaSize);
}
+ if (compiler->opts.IsReversePInvoke())
+ {
+ unsigned reversePInvokeFrameVarNumber = compiler->lvaReversePInvokeFrameVar;
+ assert(reversePInvokeFrameVarNumber != BAD_VAR_NUM && reversePInvokeFrameVarNumber < compiler->lvaRefCount);
+ LclVarDsc& reversePInvokeFrameVar = compiler->lvaTable[reversePInvokeFrameVarNumber];
+ gcInfoEncoder->SetReversePInvokeFrameSlot(reversePInvokeFrameVar.lvStkOffs);
+ }
+
gcInfoEncoder->Build();
// GC Encoder automatically puts the GC info in the right spot using ICorJitInfo::allocGCInfo(size_t)
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
index 114847c0d0..01c7f8d6a7 100644
--- a/src/jit/compiler.cpp
+++ b/src/jit/compiler.cpp
@@ -1635,18 +1635,16 @@ void Compiler::compDisplayStaticSizes(FILE* fout)
sizeof(bbDummy->bbVarUse));
fprintf(fout, "Offset / size of bbVarDef = %3u / %3u\n", offsetof(BasicBlock, bbVarDef),
sizeof(bbDummy->bbVarDef));
- fprintf(fout, "Offset / size of bbVarTmp = %3u / %3u\n", offsetof(BasicBlock, bbVarTmp),
- sizeof(bbDummy->bbVarTmp));
fprintf(fout, "Offset / size of bbLiveIn = %3u / %3u\n", offsetof(BasicBlock, bbLiveIn),
sizeof(bbDummy->bbLiveIn));
fprintf(fout, "Offset / size of bbLiveOut = %3u / %3u\n", offsetof(BasicBlock, bbLiveOut),
sizeof(bbDummy->bbLiveOut));
- fprintf(fout, "Offset / size of bbHeapSsaPhiFunc = %3u / %3u\n", offsetof(BasicBlock, bbHeapSsaPhiFunc),
- sizeof(bbDummy->bbHeapSsaPhiFunc));
- fprintf(fout, "Offset / size of bbHeapSsaNumIn = %3u / %3u\n", offsetof(BasicBlock, bbHeapSsaNumIn),
- sizeof(bbDummy->bbHeapSsaNumIn));
- fprintf(fout, "Offset / size of bbHeapSsaNumOut = %3u / %3u\n", offsetof(BasicBlock, bbHeapSsaNumOut),
- sizeof(bbDummy->bbHeapSsaNumOut));
+ fprintf(fout, "Offset / size of bbMemorySsaPhiFunc = %3u / %3u\n", offsetof(BasicBlock, bbMemorySsaPhiFunc),
+ sizeof(bbDummy->bbMemorySsaPhiFunc));
+ fprintf(fout, "Offset / size of bbMemorySsaNumIn = %3u / %3u\n", offsetof(BasicBlock, bbMemorySsaNumIn),
+ sizeof(bbDummy->bbMemorySsaNumIn));
+ fprintf(fout, "Offset / size of bbMemorySsaNumOut = %3u / %3u\n", offsetof(BasicBlock, bbMemorySsaNumOut),
+ sizeof(bbDummy->bbMemorySsaNumOut));
fprintf(fout, "Offset / size of bbScope = %3u / %3u\n", offsetof(BasicBlock, bbScope),
sizeof(bbDummy->bbScope));
fprintf(fout, "Offset / size of bbCseGen = %3u / %3u\n", offsetof(BasicBlock, bbCseGen),
@@ -1788,9 +1786,9 @@ void Compiler::compInit(ArenaAllocator* pAlloc, InlineInfo* inlineInfo)
impSpillCliquePredMembers = ExpandArray<BYTE>(getAllocator());
impSpillCliqueSuccMembers = ExpandArray<BYTE>(getAllocator());
- memset(&lvHeapPerSsaData, 0, sizeof(PerSsaArray));
- lvHeapPerSsaData.Init(getAllocator());
- lvHeapNumSsaNames = 0;
+ memset(&lvMemoryPerSsaData, 0, sizeof(PerSsaArray));
+ lvMemoryPerSsaData.Init(getAllocator());
+ lvMemoryNumSsaNames = 0;
//
// Initialize all the per-method statistics gathering data structures.
@@ -1871,8 +1869,11 @@ void Compiler::compInit(ArenaAllocator* pAlloc, InlineInfo* inlineInfo)
m_fieldSeqStore = nullptr;
m_zeroOffsetFieldMap = nullptr;
m_arrayInfoMap = nullptr;
- m_heapSsaMap = nullptr;
m_refAnyClass = nullptr;
+ for (MemoryKind memoryKind : allMemoryKinds())
+ {
+ m_memorySsaMap[memoryKind] = nullptr;
+ }
#ifdef DEBUG
if (!compIsForInlining())
@@ -2312,6 +2313,9 @@ void Compiler::compSetProcessor()
if (opts.compCanUseAVX)
{
codeGen->getEmitter()->SetUseAVX(true);
+ // Assume each JITted method does not contain AVX instruction at first
+ codeGen->getEmitter()->SetContainsAVX(false);
+ codeGen->getEmitter()->SetContains256bitAVX(false);
}
else
#endif // FEATURE_AVX_SUPPORT
@@ -3024,6 +3028,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
#ifdef FEATURE_SIMD
// Minimum bar for availing SIMD benefits is SSE2 on AMD64/x86.
featureSIMD = jitFlags->IsSet(JitFlags::JIT_FLAG_FEATURE_SIMD);
+ setUsesSIMDTypes(false);
#endif // FEATURE_SIMD
if (compIsForInlining() || compIsForImportOnly())
@@ -3296,8 +3301,6 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
}
#endif
- opts.compMustInlinePInvokeCalli = jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB);
-
opts.compScopeInfo = opts.compDbgInfo;
#ifdef LATE_DISASM
@@ -4194,11 +4197,17 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags
assert(!fgComputePredsDone);
if (fgCheapPredsValid)
{
- // Remove cheap predecessors before inlining; allowing the cheap predecessor lists to be inserted
- // with inlined blocks causes problems.
+ // Remove cheap predecessors before inlining and fat call transformation;
+ // allowing the cheap predecessor lists to be inserted causes problems
+ // with splitting existing blocks.
fgRemovePreds();
}
+ if (IsTargetAbi(CORINFO_CORERT_ABI) && doesMethodHaveFatPointer())
+ {
+ fgTransformFatCalli();
+ }
+
EndPhase(PHASE_IMPORTATION);
if (compIsForInlining())
@@ -4598,6 +4607,10 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags
codeGen->regSet.rsMaskResvd |= RBM_OPT_RSVD;
assert(REG_OPT_RSVD != REG_FP);
}
+ // compRsvdRegCheck() has read out the FramePointerUsed property, but doLinearScan()
+ // tries to overwrite it later. This violates the PhasedVar rule and triggers an assertion.
+ // TODO-ARM-Bug?: What is the proper way to handle this situation?
+ codeGen->resetFramePointerUsedWritePhase();
#ifdef DEBUG
//
@@ -4718,21 +4731,6 @@ void Compiler::ResetOptAnnotations()
tree->ClearVN();
tree->ClearAssertion();
tree->gtCSEnum = NO_CSE;
-
- // Clear any *_ASG_LHS flags -- these are set during SSA construction,
- // and the heap live-in calculation depends on them being unset coming
- // into SSA construction (without clearing them, a block that has a
- // heap def via one of these before any heap use is treated as not having
- // an upwards-exposed heap use, even though subsequent heap uses may not
- // be killed by the store; this seems to be a bug, worked around here).
- if (tree->OperIsIndir())
- {
- tree->gtFlags &= ~GTF_IND_ASG_LHS;
- }
- else if (tree->OperGet() == GT_CLS_VAR)
- {
- tree->gtFlags &= ~GTF_CLS_VAR_ASG_LHS;
- }
}
}
}
@@ -6708,16 +6706,7 @@ Compiler::NodeToIntMap* Compiler::FindReachableNodesInNodeTestData()
if (arg->gtFlags & GTF_LATE_ARG)
{
// Find the corresponding late arg.
- GenTreePtr lateArg = nullptr;
- for (unsigned j = 0; j < call->fgArgInfo->ArgCount(); j++)
- {
- if (call->fgArgInfo->ArgTable()[j]->argNum == i)
- {
- lateArg = call->fgArgInfo->ArgTable()[j]->node;
- break;
- }
- }
- assert(lateArg != nullptr);
+ GenTreePtr lateArg = call->fgArgInfo->GetLateArg(i);
if (GetNodeTestData()->Lookup(lateArg, &tlAndN))
{
reachable->Set(lateArg, 0);
@@ -6805,14 +6794,14 @@ void Compiler::CopyTestDataToCloneTree(GenTreePtr from, GenTreePtr to)
assert(to->gtOp.gtOp1 == nullptr);
}
- if (from->gtGetOp2() != nullptr)
+ if (from->gtGetOp2IfPresent() != nullptr)
{
- assert(to->gtGetOp2() != nullptr);
+ assert(to->gtGetOp2IfPresent() != nullptr);
CopyTestDataToCloneTree(from->gtGetOp2(), to->gtGetOp2());
}
else
{
- assert(to->gtGetOp2() == nullptr);
+ assert(to->gtGetOp2IfPresent() == nullptr);
}
return;
@@ -6863,8 +6852,8 @@ void Compiler::CopyTestDataToCloneTree(GenTreePtr from, GenTreePtr to)
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- CopyTestDataToCloneTree(from->gtBoundsChk.gtArrLen, to->gtBoundsChk.gtArrLen);
CopyTestDataToCloneTree(from->gtBoundsChk.gtIndex, to->gtBoundsChk.gtIndex);
+ CopyTestDataToCloneTree(from->gtBoundsChk.gtArrLen, to->gtBoundsChk.gtArrLen);
return;
default:
@@ -9175,10 +9164,6 @@ int cTreeFlagsIR(Compiler* comp, GenTree* tree)
{
chars += printf("[RELOP_QMARK]");
}
- if (tree->gtFlags & GTF_RELOP_SMALL)
- {
- chars += printf("[RELOP_SMALL]");
- }
break;
case GT_QMARK:
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index d8cd491063..4239cf613b 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -268,10 +268,6 @@ public:
unsigned char lvDisqualify : 1; // variable is no longer OK for add copy optimization
unsigned char lvVolatileHint : 1; // hint for AssertionProp
#endif
-#if FANCY_ARRAY_OPT
- unsigned char lvAssignOne : 1; // assigned at least once?
- unsigned char lvAssignTwo : 1; // assigned at least twice?
-#endif
unsigned char lvSpilled : 1; // enregistered variable was spilled
#ifndef _TARGET_64BIT_
@@ -322,6 +318,7 @@ public:
// type of an arg node is TYP_BYREF and a local node is TYP_SIMD*.
unsigned char lvSIMDType : 1; // This is a SIMD struct
unsigned char lvUsedInSIMDIntrinsic : 1; // This tells lclvar is used for simd intrinsic
+ var_types lvBaseType : 5; // Note: this only packs because var_types is a typedef of unsigned char
#endif // FEATURE_SIMD
unsigned char lvRegStruct : 1; // This is a reg-sized non-field-addressed struct.
@@ -330,9 +327,6 @@ public:
// local.
unsigned lvParentLcl; // The index of the local var representing the parent (i.e. the promoted struct local).
// Valid on promoted struct local fields.
-#ifdef FEATURE_SIMD
- var_types lvBaseType; // The base type of a SIMD local var. Valid on TYP_SIMD locals.
-#endif // FEATURE_SIMD
};
unsigned char lvFieldCnt; // Number of fields in the promoted VarDsc.
@@ -676,7 +670,7 @@ public:
#endif // defined(_TARGET_64BIT_)
}
- unsigned lvSize() // Size needed for storage representation. Only used for structs or TYP_BLK.
+ unsigned lvSize() const // Size needed for storage representation. Only used for structs or TYP_BLK.
{
// TODO-Review: Sometimes we get called on ARM with HFA struct variables that have been promoted,
// where the struct itself is no longer used because all access is via its member fields.
@@ -694,7 +688,8 @@ public:
#if defined(FEATURE_SIMD) && !defined(_TARGET_64BIT_)
// For 32-bit architectures, we make local variable SIMD12 types 16 bytes instead of just 12. We can't do
- // this for arguments, which must be passed according the defined ABI.
+ // this for arguments, which must be passed according the defined ABI. We don't want to do this for
+ // dependently promoted struct fields, but we don't know that here. See lvaMapSimd12ToSimd16().
if ((lvType == TYP_SIMD12) && !lvIsParam)
{
assert(lvExactSize == 12);
@@ -711,10 +706,6 @@ public:
BYTE* lvGcLayout; // GC layout info for structs
-#if FANCY_ARRAY_OPT
- GenTreePtr lvKnownDim; // array size if known
-#endif
-
#if ASSERTION_PROP
BlockSet lvRefBlks; // Set of blocks that contain refs
GenTreePtr lvDefStmt; // Pointer to the statement with the single definition
@@ -1195,6 +1186,11 @@ struct fgArgTabEntry
unsigned alignment; // 1 or 2 (slots/registers)
unsigned lateArgInx; // index into gtCallLateArgs list
unsigned tmpNum; // the LclVar number if we had to force evaluation of this arg
+#if defined(UNIX_X86_ABI)
+ unsigned padStkAlign; // Count of number of padding slots for stack alignment. For each Call, only the first
+ // argument may have a value to emit "sub esp, n" to adjust the stack before pushing
+ // the argument.
+#endif
bool isSplit : 1; // True when this argument is split between the registers and OutArg area
bool needTmp : 1; // True when we force this argument's evaluation into a temp LclVar
@@ -1272,6 +1268,10 @@ class fgArgInfo
unsigned argCount; // Updatable arg count value
unsigned nextSlotNum; // Updatable slot count value
unsigned stkLevel; // Stack depth when we make this call (for x86)
+#if defined(UNIX_X86_ABI)
+ unsigned padStkAlign; // Count of number of padding slots for stack alignment. This value is used to turn back
+ // stack pointer before it was adjusted after each Call
+#endif
unsigned argTableSize; // size of argTable array (equal to the argCount when done with fgMorphArgs)
bool hasRegArgs; // true if we have one or more register arguments
@@ -1321,6 +1321,10 @@ public:
void ArgsComplete();
+#if defined(UNIX_X86_ABI)
+ void ArgsAlignPadding();
+#endif
+
void SortArgs();
void EvalArgsToTemps();
@@ -1340,6 +1344,12 @@ public:
{
return nextSlotNum;
}
+#if defined(UNIX_X86_ABI)
+ unsigned GetPadStackAlign()
+ {
+ return padStkAlign;
+ }
+#endif
bool HasRegArgs()
{
return hasRegArgs;
@@ -1352,6 +1362,9 @@ public:
{
return argsComplete;
}
+
+ // Get the late arg for arg at position argIndex. Caller must ensure this position has a late arg.
+ GenTreePtr GetLateArg(unsigned argIndex);
};
#ifdef DEBUG
@@ -1771,7 +1784,11 @@ public:
// a PSPSym for functions with any EH.
bool ehNeedsPSPSym() const
{
+#ifdef _TARGET_X86_
+ return false;
+#else // _TARGET_X86_
return compHndBBtabCount > 0;
+#endif // _TARGET_X86_
}
bool ehAnyFunclets(); // Are there any funclets in this function?
@@ -1936,6 +1953,11 @@ public:
GenTreePtr gtNewOneConNode(var_types type);
+#ifdef FEATURE_SIMD
+ GenTreePtr gtNewSIMDVectorZero(var_types simdType, var_types baseType, unsigned size);
+ GenTreePtr gtNewSIMDVectorOne(var_types simdType, var_types baseType, unsigned size);
+#endif
+
GenTreeBlk* gtNewBlkOpNode(
genTreeOps oper, GenTreePtr dst, GenTreePtr srcOrFillVal, GenTreePtr sizeOrClsTok, bool isVolatile);
@@ -1981,6 +2003,7 @@ public:
SIMDIntrinsicID simdIntrinsicID,
var_types baseType,
unsigned size);
+ void SetOpLclRelatedToSIMDIntrinsic(GenTreePtr op);
#endif
GenTreePtr gtNewLclLNode(unsigned lnum, var_types type, IL_OFFSETX ILoffs = BAD_IL_OFFSET);
@@ -2063,13 +2086,13 @@ public:
bool gtHasLocalsWithAddrOp(GenTreePtr tree);
- unsigned gtHashValue(GenTree* tree);
-
unsigned gtSetListOrder(GenTree* list, bool regs, bool isListCallArgs);
void gtWalkOp(GenTree** op1, GenTree** op2, GenTree* adr, bool constOnly);
#ifdef DEBUG
+ unsigned gtHashValue(GenTree* tree);
+
GenTreePtr gtWalkOpEffectiveVal(GenTreePtr op);
#endif
@@ -2653,6 +2676,35 @@ public:
bool lvaIsFieldOfDependentlyPromotedStruct(const LclVarDsc* varDsc);
bool lvaIsGCTracked(const LclVarDsc* varDsc);
+#if defined(FEATURE_SIMD)
+ bool lvaMapSimd12ToSimd16(const LclVarDsc* varDsc)
+ {
+ assert(varDsc->lvType == TYP_SIMD12);
+ assert(varDsc->lvExactSize == 12);
+
+#if defined(_TARGET_64BIT_)
+ assert(varDsc->lvSize() == 16);
+ return true;
+#else // !defined(_TARGET_64BIT_)
+
+ // For 32-bit architectures, we make local variable SIMD12 types 16 bytes instead of just 12. lvSize()
+ // already does this calculation. However, we also need to prevent mapping types if the var is a
+ // depenendently promoted struct field, which must remain its exact size within its parent struct.
+ // However, we don't know this until late, so we may have already pretended the field is bigger
+ // before that.
+ if ((varDsc->lvSize() == 16) && !lvaIsFieldOfDependentlyPromotedStruct(varDsc))
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+
+#endif // !defined(_TARGET_64BIT_)
+ }
+#endif // defined(FEATURE_SIMD)
+
BYTE* lvaGetGcLayout(unsigned varNum);
bool lvaTypeIsGC(unsigned varNum);
unsigned lvaGSSecurityCookie; // LclVar number
@@ -2697,21 +2749,21 @@ protected:
static fgWalkPreFn lvaMarkLclRefsCallback;
void lvaMarkLclRefs(GenTreePtr tree);
- // Keeps the mapping from SSA #'s to VN's for the implicit "Heap" variable.
- PerSsaArray lvHeapPerSsaData;
- unsigned lvHeapNumSsaNames;
+ // Keeps the mapping from SSA #'s to VN's for the implicit memory variables.
+ PerSsaArray lvMemoryPerSsaData;
+ unsigned lvMemoryNumSsaNames;
public:
- // Returns the address of the per-Ssa data for "Heap" at the given ssaNum (which is required
+ // Returns the address of the per-Ssa data for memory at the given ssaNum (which is required
// not to be the SsaConfig::RESERVED_SSA_NUM, which indicates that the variable is
// not an SSA variable).
- LclSsaVarDsc* GetHeapPerSsaData(unsigned ssaNum)
+ LclSsaVarDsc* GetMemoryPerSsaData(unsigned ssaNum)
{
assert(ssaNum != SsaConfig::RESERVED_SSA_NUM);
assert(SsaConfig::RESERVED_SSA_NUM == 0);
ssaNum--;
- assert(ssaNum < lvHeapNumSsaNames);
- return &lvHeapPerSsaData.GetRef(ssaNum);
+ assert(ssaNum < lvMemoryNumSsaNames);
+ return &lvMemoryPerSsaData.GetRef(ssaNum);
}
/*
@@ -2780,7 +2832,7 @@ protected:
void impImportNewObjArray(CORINFO_RESOLVED_TOKEN* pResolvedToken, CORINFO_CALL_INFO* pCallInfo);
- bool impCanPInvokeInline(BasicBlock* block);
+ bool impCanPInvokeInline();
bool impCanPInvokeInlineCallSite(BasicBlock* block);
void impCheckForPInvokeCall(
GenTreePtr call, CORINFO_METHOD_HANDLE methHnd, CORINFO_SIG_INFO* sig, unsigned mflags, BasicBlock* block);
@@ -2831,7 +2883,8 @@ protected:
void impImportLeave(BasicBlock* block);
void impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr);
- GenTreePtr impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
+ GenTreePtr impIntrinsic(GenTreePtr newobjThis,
+ CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
int memberRef,
@@ -3425,6 +3478,7 @@ public:
bool fgComputePredsDone; // Have we computed the bbPreds list
bool fgCheapPredsValid; // Is the bbCheapPreds list valid?
bool fgDomsComputed; // Have we computed the dominator sets?
+ bool fgOptimizedFinally; // Did we optimize any try-finallys?
bool fgHasSwitch; // any BBJ_SWITCH jumps?
bool fgHasPostfix; // any postfix ++/-- found?
@@ -3493,8 +3547,20 @@ public:
void fgImport();
+ void fgTransformFatCalli();
+
void fgInline();
+ void fgRemoveEmptyTry();
+
+ void fgRemoveEmptyFinally();
+
+ void fgCloneFinally();
+
+ void fgCleanupContinuation(BasicBlock* continuation);
+
+ void fgUpdateFinallyTargetFlags();
+
GenTreePtr fgGetCritSectOfStaticMethod();
#if !defined(_TARGET_X86_)
@@ -3570,10 +3636,9 @@ public:
void fgLocalVarLivenessInit();
#ifdef LEGACY_BACKEND
- GenTreePtr fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, GenTreePtr relopNode, GenTreePtr asgdLclVar);
+ GenTreePtr fgLegacyPerStatementLocalVarLiveness(GenTreePtr startNode, GenTreePtr relopNode);
#else
- void fgPerNodeLocalVarLiveness(GenTree* node, GenTree* asgdLclVar);
- void fgPerStatementLocalVarLiveness(GenTree* node, GenTree* asgdLclVar);
+ void fgPerNodeLocalVarLiveness(GenTree* node);
#endif
void fgPerBlockLocalVarLiveness();
@@ -3741,18 +3806,18 @@ public:
// tree node).
void fgValueNumber();
- // Updates "fgCurHeap" via the assignment H[elemTypeEq][arrVN][inx][fldSeq] = rhsVN.
+ // Computes new GcHeap VN via the assignment H[elemTypeEq][arrVN][inx][fldSeq] = rhsVN.
// Assumes that "elemTypeEq" is the (equivalence class rep) of the array element type.
// The 'indType' is the indirection type of the lhs of the assignment and will typically
// match the element type of the array or fldSeq. When this type doesn't match
// or if the fldSeq is 'NotAField' we invalidate the array contents H[elemTypeEq][arrVN]
//
- void fgValueNumberArrIndexAssign(CORINFO_CLASS_HANDLE elemTypeEq,
- ValueNum arrVN,
- ValueNum inxVN,
- FieldSeqNode* fldSeq,
- ValueNum rhsVN,
- var_types indType);
+ ValueNum fgValueNumberArrIndexAssign(CORINFO_CLASS_HANDLE elemTypeEq,
+ ValueNum arrVN,
+ ValueNum inxVN,
+ FieldSeqNode* fldSeq,
+ ValueNum rhsVN,
+ var_types indType);
// Requires that "tree" is a GT_IND marked as an array index, and that its address argument
// has been parsed to yield the other input arguments. If evaluation of the address
@@ -3772,33 +3837,43 @@ public:
// Requires "funcApp" to be a VNF_PtrToArrElem, and "addrXvn" to represent the exception set thrown
// by evaluating the array index expression "tree". Returns the value number resulting from
- // dereferencing the array in the current heap state. If "tree" is non-null, it must be the
+ // dereferencing the array in the current GcHeap state. If "tree" is non-null, it must be the
// "GT_IND" that does the dereference, and it is given the returned value number.
ValueNum fgValueNumberArrIndexVal(GenTreePtr tree, struct VNFuncApp* funcApp, ValueNum addrXvn);
+ // Compute the value number for a byref-exposed load of the given type via the given pointerVN.
+ ValueNum fgValueNumberByrefExposedLoad(var_types type, ValueNum pointerVN);
+
unsigned fgVNPassesCompleted; // Number of times fgValueNumber has been run.
// Utility functions for fgValueNumber.
- // Perform value-numbering for the trees in "blk". When giving VN's to the SSA
- // names defined by phi definitions at the start of "blk", "newVNsForPhis" indicates
- // that these should be given new VN's, irrespective of the values of the LHS.
- // If "false", then we may assume that all inputs to phi RHS's of such definitions
- // have already been assigned value numbers; if they are all assigned the *same* value
- // number, then the LHS SSA name gets the same VN.
- void fgValueNumberBlock(BasicBlock* blk, bool newVNsForPhis);
+ // Perform value-numbering for the trees in "blk".
+ void fgValueNumberBlock(BasicBlock* blk);
// Requires that "entryBlock" is the entry block of loop "loopNum", and that "loopNum" is the
// innermost loop of which "entryBlock" is the entry. Returns the value number that should be
- // assumed for the heap at the start "entryBlk".
- ValueNum fgHeapVNForLoopSideEffects(BasicBlock* entryBlock, unsigned loopNum);
+ // assumed for the memoryKind at the start "entryBlk".
+ ValueNum fgMemoryVNForLoopSideEffects(MemoryKind memoryKind, BasicBlock* entryBlock, unsigned loopNum);
- // Called when an operation (performed by "tree", described by "msg") may cause the global Heap to be mutated.
- void fgMutateHeap(GenTreePtr tree DEBUGARG(const char* msg));
+ // Called when an operation (performed by "tree", described by "msg") may cause the GcHeap to be mutated.
+ // As GcHeap is a subset of ByrefExposed, this will also annotate the ByrefExposed mutation.
+ void fgMutateGcHeap(GenTreePtr tree DEBUGARG(const char* msg));
- // Tree caused an update in the current heap VN. If "tree" has an associated heap SSA #, record that
+ // Called when an operation (performed by "tree", described by "msg") may cause an address-exposed local to be
+ // mutated.
+ void fgMutateAddressExposedLocal(GenTreePtr tree DEBUGARG(const char* msg));
+
+ // For a GC heap store at curTree, record the new curMemoryVN's and update curTree's MemorySsaMap.
+ // As GcHeap is a subset of ByrefExposed, this will also record the ByrefExposed store.
+ void recordGcHeapStore(GenTreePtr curTree, ValueNum gcHeapVN DEBUGARG(const char* msg));
+
+ // For a store to an address-exposed local at curTree, record the new curMemoryVN and update curTree's MemorySsaMap.
+ void recordAddressExposedLocalStore(GenTreePtr curTree, ValueNum memoryVN DEBUGARG(const char* msg));
+
+ // Tree caused an update in the current memory VN. If "tree" has an associated heap SSA #, record that
// value in that SSA #.
- void fgValueNumberRecordHeapSsa(GenTreePtr tree);
+ void fgValueNumberRecordMemorySsa(MemoryKind memoryKind, GenTreePtr tree);
// The input 'tree' is a leaf node that is a constant
// Assign the proper value number to the tree
@@ -3837,11 +3912,11 @@ public:
// Requires "helpFunc" to be pure. Returns the corresponding VNFunc.
VNFunc fgValueNumberHelperMethVNFunc(CorInfoHelpFunc helpFunc);
- // This is the current value number for the "Heap" implicit variable while
- // doing value numbering. This is the value number under the "liberal" interpretation
- // of heap values; the "conservative" interpretation needs no VN, since every access of
- // the heap yields an unknown value.
- ValueNum fgCurHeapVN;
+ // These are the current value number for the memory implicit variables while
+ // doing value numbering. These are the value numbers under the "liberal" interpretation
+ // of memory values; the "conservative" interpretation needs no VN, since every access of
+ // memory yields an unknown value.
+ ValueNum fgCurMemoryVN[MemoryKindCount];
// Return a "pseudo"-class handle for an array element type. If "elemType" is TYP_STRUCT,
// requires "elemStructType" to be non-null (and to have a low-order zero). Otherwise, low order bit
@@ -4272,6 +4347,7 @@ public:
void fgDebugCheckNodeLinks(BasicBlock* block, GenTreePtr stmt);
void fgDebugCheckFlags(GenTreePtr tree);
void fgDebugCheckFlagsHelper(GenTreePtr tree, unsigned treeFlags, unsigned chkFlags);
+ void fgDebugCheckTryFinallyExits();
#endif
#ifdef LEGACY_BACKEND
@@ -4524,7 +4600,6 @@ private:
static MorphAddrContext s_CopyBlockMAC;
#ifdef FEATURE_SIMD
- GenTreePtr fgCopySIMDNode(GenTreeSIMD* simdNode);
GenTreePtr getSIMDStructFromField(GenTreePtr tree,
var_types* baseTypeOut,
unsigned* indexOut,
@@ -4613,11 +4688,13 @@ private:
VARSET_TP fgCurUseSet; // vars used by block (before an assignment)
VARSET_TP fgCurDefSet; // vars assigned by block (before a use)
- bool fgCurHeapUse; // True iff the current basic block uses the heap before defining it.
- bool fgCurHeapDef; // True iff the current basic block defines the heap.
- bool fgCurHeapHavoc; // True if the current basic block is known to set the heap to a "havoc" value.
+ MemoryKindSet fgCurMemoryUse; // True iff the current basic block uses memory.
+ MemoryKindSet fgCurMemoryDef; // True iff the current basic block modifies memory.
+ MemoryKindSet fgCurMemoryHavoc; // True if the current basic block is known to set memory to a "havoc" value.
- void fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar = nullptr);
+ bool byrefStatesMatchGcHeapStates; // True iff GcHeap and ByrefExposed memory have all the same def points.
+
+ void fgMarkUseDef(GenTreeLclVarCommon* tree);
void fgBeginScopeLife(VARSET_TP* inScope, VarScopeDsc* var);
void fgEndScopeLife(VARSET_TP* inScope, VarScopeDsc* var);
@@ -4686,6 +4763,9 @@ private:
#ifdef DEBUG
static fgWalkPreFn fgDebugCheckInlineCandidates;
+
+ void CheckNoFatPointerCandidatesLeft();
+ static fgWalkPreFn fgDebugCheckFatPointerCandidates;
#endif
void fgPromoteStructs();
@@ -4968,9 +5048,10 @@ public:
#define LPFLG_ASGVARS_INC 0x8000 // "lpAsgVars" is incomplete -- vars beyond those representable in an AllVarSet
// type are assigned to.
- bool lpLoopHasHeapHavoc; // The loop contains an operation that we assume has arbitrary heap side effects.
- // If this is set, the fields below may not be accurate (since they become irrelevant.)
- bool lpContainsCall; // True if executing the loop body *may* execute a call
+ bool lpLoopHasMemoryHavoc[MemoryKindCount]; // The loop contains an operation that we assume has arbitrary
+ // memory side effects. If this is set, the fields below
+ // may not be accurate (since they become irrelevant.)
+ bool lpContainsCall; // True if executing the loop body *may* execute a call
VARSET_TP lpVarInOut; // The set of variables that are IN or OUT during the execution of this loop
VARSET_TP lpVarUseDef; // The set of variables that are USE or DEF during the execution of this loop
@@ -5307,6 +5388,9 @@ protected:
treeStmtLstPtr csdTreeList; // list of matching tree nodes: head
treeStmtLstPtr csdTreeLast; // list of matching tree nodes: tail
+
+ ValueNum defConservativeVN; // if all def occurrences share the same conservative value
+ // number, this will reflect it; otherwise, NoVN.
};
static const size_t s_optCSEhashSize;
@@ -5462,11 +5546,27 @@ public:
}
};
-#define OMF_HAS_NEWARRAY 0x00000001 // Method contains 'new' of an array
-#define OMF_HAS_NEWOBJ 0x00000002 // Method contains 'new' of an object type.
-#define OMF_HAS_ARRAYREF 0x00000004 // Method contains array element loads or stores.
-#define OMF_HAS_VTABLEREF 0x00000008 // Method contains method table reference.
-#define OMF_HAS_NULLCHECK 0x00000010 // Method contains null check.
+#define OMF_HAS_NEWARRAY 0x00000001 // Method contains 'new' of an array
+#define OMF_HAS_NEWOBJ 0x00000002 // Method contains 'new' of an object type.
+#define OMF_HAS_ARRAYREF 0x00000004 // Method contains array element loads or stores.
+#define OMF_HAS_VTABLEREF 0x00000008 // Method contains method table reference.
+#define OMF_HAS_NULLCHECK 0x00000010 // Method contains null check.
+#define OMF_HAS_FATPOINTER 0x00000020 // Method contains call, that needs fat pointer transformation.
+
+ bool doesMethodHaveFatPointer()
+ {
+ return (optMethodFlags & OMF_HAS_FATPOINTER) != 0;
+ }
+
+ void setMethodHasFatPointer()
+ {
+ optMethodFlags |= OMF_HAS_FATPOINTER;
+ }
+
+ void clearMethodHasFatPointer()
+ {
+ optMethodFlags &= ~OMF_HAS_FATPOINTER;
+ }
unsigned optMethodFlags;
@@ -5931,10 +6031,6 @@ protected:
ssize_t optGetArrayRefScaleAndIndex(GenTreePtr mul, GenTreePtr* pIndex DEBUGARG(bool bRngChk));
GenTreePtr optFindLocalInit(BasicBlock* block, GenTreePtr local, VARSET_TP* pKilledInOut, bool* isKilledAfterInit);
-#if FANCY_ARRAY_OPT
- bool optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1 = 0, int add2 = 0);
-#endif
-
bool optReachWithoutCall(BasicBlock* srcBB, BasicBlock* dstBB);
protected:
@@ -6845,10 +6941,15 @@ private:
void unwindReserveFunc(FuncInfoDsc* func);
void unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode);
-#if defined(_TARGET_AMD64_)
+#if defined(_TARGET_AMD64_) || (defined(_TARGET_X86_) && FEATURE_EH_FUNCLETS)
void unwindReserveFuncHelper(FuncInfoDsc* func, bool isHotCode);
void unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pColdCode, bool isHotCode);
+
+#endif // _TARGET_AMD64_ || (_TARGET_X86_ && FEATURE_EH_FUNCLETS)
+
+#if defined(_TARGET_AMD64_)
+
UNATIVE_OFFSET unwindGetCurrentOffset(FuncInfoDsc* func);
void unwindBegPrologWindows();
@@ -6932,6 +7033,20 @@ private:
// Should we support SIMD intrinsics?
bool featureSIMD;
+ // Have we identified any SIMD types?
+ // This is currently used by struct promotion to avoid getting type information for a struct
+ // field to see if it is a SIMD type, if we haven't seen any SIMD types or operations in
+ // the method.
+ bool _usesSIMDTypes;
+ bool usesSIMDTypes()
+ {
+ return _usesSIMDTypes;
+ }
+ void setUsesSIMDTypes(bool value)
+ {
+ _usesSIMDTypes = value;
+ }
+
// This is a temp lclVar allocated on the stack as TYP_SIMD. It is used to implement intrinsics
// that require indexed access to the individual fields of the vector, which is not well supported
// by the hardware. It is allocated when/if such situations are encountered during Lowering.
@@ -7121,6 +7236,9 @@ private:
GenTree** op1,
GenTree** op2);
+ // Creates a GT_SIMD tree for Abs intrinsic.
+ GenTreePtr impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType, unsigned simdVectorSize, GenTree* op1);
+
#if defined(_TARGET_XARCH_) && !defined(LEGACY_BACKEND)
// Transforms operands and returns the SIMD intrinsic to be applied on
// transformed operands to obtain == comparison result.
@@ -7623,8 +7741,6 @@ public:
static const bool compNoPInvokeInlineCB;
#endif
- bool compMustInlinePInvokeCalli; // Unmanaged CALLI in IL stubs must be inlined
-
#ifdef DEBUG
bool compGcChecks; // Check arguments and return values to ensure they are sane
bool compStackCheckOnRet; // Check ESP on return to ensure it is correct
@@ -7783,11 +7899,22 @@ public:
/* These should not be exhaustively used as they might */ \
/* hide/trivialize other areas */ \
\
- STRESS_MODE(REGS) STRESS_MODE(DBL_ALN) STRESS_MODE(LCL_FLDS) STRESS_MODE(UNROLL_LOOPS) \
- STRESS_MODE(MAKE_CSE) STRESS_MODE(LEGACY_INLINE) STRESS_MODE(CLONE_EXPR) \
- STRESS_MODE(USE_FCOMI) STRESS_MODE(USE_CMOV) STRESS_MODE(FOLD) \
- STRESS_MODE(BB_PROFILE) STRESS_MODE(OPT_BOOLS_GC) STRESS_MODE(REMORPH_TREES) \
- STRESS_MODE(64RSLT_MUL) STRESS_MODE(DO_WHILE_LOOPS) STRESS_MODE(MIN_OPTS) \
+ STRESS_MODE(REGS) \
+ STRESS_MODE(DBL_ALN) \
+ STRESS_MODE(LCL_FLDS) \
+ STRESS_MODE(UNROLL_LOOPS) \
+ STRESS_MODE(MAKE_CSE) \
+ STRESS_MODE(LEGACY_INLINE) \
+ STRESS_MODE(CLONE_EXPR) \
+ STRESS_MODE(USE_FCOMI) \
+ STRESS_MODE(USE_CMOV) \
+ STRESS_MODE(FOLD) \
+ STRESS_MODE(BB_PROFILE) \
+ STRESS_MODE(OPT_BOOLS_GC) \
+ STRESS_MODE(REMORPH_TREES) \
+ STRESS_MODE(64RSLT_MUL) \
+ STRESS_MODE(DO_WHILE_LOOPS) \
+ STRESS_MODE(MIN_OPTS) \
STRESS_MODE(REVERSE_FLAG) /* Will set GTF_REVERSE_OPS whenever we can */ \
STRESS_MODE(REVERSE_COMMA) /* Will reverse commas created with gtNewCommaNode */ \
STRESS_MODE(TAILCALL) /* Will make the call as a tailcall whenever legal */ \
@@ -7796,17 +7923,23 @@ public:
STRESS_MODE(NULL_OBJECT_CHECK) \
STRESS_MODE(PINVOKE_RESTORE_ESP) \
STRESS_MODE(RANDOM_INLINE) \
+ STRESS_MODE(SWITCH_CMP_BR_EXPANSION) \
+ STRESS_MODE(GENERIC_VARN) \
+ \
+ /* After COUNT_VARN, stress level 2 does all of these all the time */ \
\
- STRESS_MODE(GENERIC_VARN) STRESS_MODE(COUNT_VARN) \
+ STRESS_MODE(COUNT_VARN) \
\
/* "Check" stress areas that can be exhaustively used if we */ \
/* dont care about performance at all */ \
\
STRESS_MODE(FORCE_INLINE) /* Treat every method as AggressiveInlining */ \
STRESS_MODE(CHK_FLOW_UPDATE) \
- STRESS_MODE(EMITTER) STRESS_MODE(CHK_REIMPORT) STRESS_MODE(FLATFP) \
- \
- STRESS_MODE(GENERIC_CHECK) STRESS_MODE(COUNT) \
+ STRESS_MODE(EMITTER) \
+ STRESS_MODE(CHK_REIMPORT) \
+ STRESS_MODE(FLATFP) \
+ STRESS_MODE(GENERIC_CHECK) \
+ STRESS_MODE(COUNT)
enum compStressArea
{
@@ -8951,21 +9084,28 @@ public:
return compRoot->m_arrayInfoMap;
}
- NodeToUnsignedMap* m_heapSsaMap;
+ NodeToUnsignedMap* m_memorySsaMap[MemoryKindCount];
- // In some cases, we want to assign intermediate SSA #'s to heap states, and know what nodes create those heap
- // states. (We do this for try blocks, where, if the try block doesn't do a call that loses track of the heap state,
- // all the possible heap states are possible initial states of the corresponding catch block(s).)
- NodeToUnsignedMap* GetHeapSsaMap()
+ // In some cases, we want to assign intermediate SSA #'s to memory states, and know what nodes create those memory
+ // states. (We do this for try blocks, where, if the try block doesn't do a call that loses track of the memory
+ // state, all the possible memory states are possible initial states of the corresponding catch block(s).)
+ NodeToUnsignedMap* GetMemorySsaMap(MemoryKind memoryKind)
{
+ if (memoryKind == GcHeap && byrefStatesMatchGcHeapStates)
+ {
+ // Use the same map for GCHeap and ByrefExposed when their states match.
+ memoryKind = ByrefExposed;
+ }
+
+ assert(memoryKind < MemoryKindCount);
Compiler* compRoot = impInlineRoot();
- if (compRoot->m_heapSsaMap == nullptr)
+ if (compRoot->m_memorySsaMap[memoryKind] == nullptr)
{
// Create a CompAllocator that labels sub-structure with CMK_ArrayInfoMap, and use that for allocation.
- IAllocator* ialloc = new (this, CMK_ArrayInfoMap) CompAllocator(this, CMK_ArrayInfoMap);
- compRoot->m_heapSsaMap = new (ialloc) NodeToUnsignedMap(ialloc);
+ IAllocator* ialloc = new (this, CMK_ArrayInfoMap) CompAllocator(this, CMK_ArrayInfoMap);
+ compRoot->m_memorySsaMap[memoryKind] = new (ialloc) NodeToUnsignedMap(ialloc);
}
- return compRoot->m_heapSsaMap;
+ return compRoot->m_memorySsaMap[memoryKind];
}
// The Refany type is the only struct type whose structure is implicitly assumed by IL. We need its fields.
diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp
index e8358fd2ab..6baf601892 100644
--- a/src/jit/compiler.hpp
+++ b/src/jit/compiler.hpp
@@ -500,6 +500,52 @@ inline regNumber genRegNumFromMask(regMaskTP mask)
return regNum;
}
+//------------------------------------------------------------------------------
+// genTypeCanRepresentValue: Checks if a value can be represented by a given type.
+//
+// Arguments:
+// value - the value to check
+// type - the type
+//
+// Return Value:
+// True if the value is representable, false otherwise.
+//
+// Notes:
+// If the type is not integral or ref like (ref/byref/array) then false is
+// always returned.
+
+template <typename TValue>
+inline bool genTypeCanRepresentValue(var_types type, TValue value)
+{
+ switch (type)
+ {
+ case TYP_UBYTE:
+ case TYP_BOOL:
+ return FitsIn<UINT8>(value);
+ case TYP_BYTE:
+ return FitsIn<INT8>(value);
+ case TYP_USHORT:
+ case TYP_CHAR:
+ return FitsIn<UINT16>(value);
+ case TYP_SHORT:
+ return FitsIn<INT16>(value);
+ case TYP_UINT:
+ return FitsIn<UINT32>(value);
+ case TYP_INT:
+ return FitsIn<INT32>(value);
+ case TYP_ULONG:
+ return FitsIn<UINT64>(value);
+ case TYP_LONG:
+ return FitsIn<INT64>(value);
+ case TYP_REF:
+ case TYP_BYREF:
+ case TYP_ARRAY:
+ return FitsIn<UINT_PTR>(value);
+ default:
+ return false;
+ }
+}
+
/*****************************************************************************
*
* Return the size in bytes of the given type.
@@ -1137,7 +1183,6 @@ inline GenTreePtr Compiler::gtNewFieldRef(
tree->gtField.gtFldObj = obj;
tree->gtField.gtFldHnd = fldHnd;
tree->gtField.gtFldOffset = offset;
- tree->gtFlags |= GTF_GLOB_REF;
#ifdef FEATURE_READYTORUN_COMPILER
tree->gtField.gtFieldLookup.addr = nullptr;
@@ -1154,6 +1199,18 @@ inline GenTreePtr Compiler::gtNewFieldRef(
{
unsigned lclNum = obj->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
lvaTable[lclNum].lvFieldAccessed = 1;
+#if defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ // These structs are passed by reference; we should probably be able to treat these
+ // as non-global refs, but downstream logic expects these to be marked this way.
+ if (lvaTable[lclNum].lvIsParam)
+ {
+ tree->gtFlags |= GTF_GLOB_REF;
+ }
+#endif // defined(_TARGET_AMD64_) || defined(_TARGET_ARM64_)
+ }
+ else
+ {
+ tree->gtFlags |= GTF_GLOB_REF;
}
return tree;
@@ -4626,15 +4683,14 @@ inline void BasicBlock::InitVarSets(Compiler* comp)
{
VarSetOps::AssignNoCopy(comp, bbVarUse, VarSetOps::MakeEmpty(comp));
VarSetOps::AssignNoCopy(comp, bbVarDef, VarSetOps::MakeEmpty(comp));
- VarSetOps::AssignNoCopy(comp, bbVarTmp, VarSetOps::MakeEmpty(comp));
VarSetOps::AssignNoCopy(comp, bbLiveIn, VarSetOps::MakeEmpty(comp));
VarSetOps::AssignNoCopy(comp, bbLiveOut, VarSetOps::MakeEmpty(comp));
VarSetOps::AssignNoCopy(comp, bbScope, VarSetOps::MakeEmpty(comp));
- bbHeapUse = false;
- bbHeapDef = false;
- bbHeapLiveIn = false;
- bbHeapLiveOut = false;
+ bbMemoryUse = emptyMemoryKindSet;
+ bbMemoryDef = emptyMemoryKindSet;
+ bbMemoryLiveIn = emptyMemoryKindSet;
+ bbMemoryLiveOut = emptyMemoryKindSet;
}
// Returns true if the basic block ends with GT_JMP
diff --git a/src/jit/compmemkind.h b/src/jit/compmemkind.h
index e27d2071f7..b22bf6de1e 100644
--- a/src/jit/compmemkind.h
+++ b/src/jit/compmemkind.h
@@ -39,7 +39,7 @@ CompMemKindMacro(IndirAssignMap)
CompMemKindMacro(FieldSeqStore)
CompMemKindMacro(ZeroOffsetFieldMap)
CompMemKindMacro(ArrayInfoMap)
-CompMemKindMacro(HeapPhiArg)
+CompMemKindMacro(MemoryPhiArg)
CompMemKindMacro(CSE)
CompMemKindMacro(GC)
CompMemKindMacro(CorSig)
diff --git a/src/jit/compphases.h b/src/jit/compphases.h
index ac1bb636ff..5038d6e9c9 100644
--- a/src/jit/compphases.h
+++ b/src/jit/compphases.h
@@ -11,9 +11,10 @@
// corresponding array of string names of those phases. This include file undefines CompPhaseNameMacro
// after the last use.
// The arguments are:
-// CompPhaseNameMacro(enumName, stringName, hasChildren, parent)
+// CompPhaseNameMacro(enumName, stringName, shortName, hasChildren, parent)
// "enumName" is an Enumeration-style all-caps name.
// "stringName" is a self-explanatory.
+// "shortName" is an abbreviated form for stringName
// "hasChildren" is true if this phase is broken out into subphases.
// (We should never do EndPhase on a phase that has children, only on 'leaf phases.')
// "parent" is -1 for leaf phases, otherwise it is the "enumName" of the parent phase.
@@ -25,6 +26,9 @@ CompPhaseNameMacro(PHASE_POST_IMPORT, "Post-import",
CompPhaseNameMacro(PHASE_MORPH_INIT, "Morph - Init", "MOR-INIT" ,false, -1)
CompPhaseNameMacro(PHASE_MORPH_INLINE, "Morph - Inlining", "MOR-INL", false, -1)
CompPhaseNameMacro(PHASE_MORPH_IMPBYREF, "Morph - ByRefs", "MOR-BYREF",false, -1)
+CompPhaseNameMacro(PHASE_EMPTY_TRY, "Remove empty try", "EMPTYTRY", false, -1)
+CompPhaseNameMacro(PHASE_EMPTY_FINALLY, "Remove empty finally", "EMPTYFIN", false, -1)
+CompPhaseNameMacro(PHASE_CLONE_FINALLY, "Clone finally", "CLONEFIN", false, -1)
CompPhaseNameMacro(PHASE_STR_ADRLCL, "Morph - Structs/AddrExp", "MOR-STRAL",false, -1)
CompPhaseNameMacro(PHASE_MORPH_GLOBAL, "Morph - Global", "MOR-GLOB", false, -1)
CompPhaseNameMacro(PHASE_MORPH_END, "Morph - Finish", "MOR-END", false, -1)
diff --git a/src/jit/decomposelongs.cpp b/src/jit/decomposelongs.cpp
index 98b8b081fc..407ae1c35b 100644
--- a/src/jit/decomposelongs.cpp
+++ b/src/jit/decomposelongs.cpp
@@ -249,6 +249,12 @@ GenTree* DecomposeLongs::DecomposeNode(GenTree* tree)
nextNode = DecomposeRotate(use);
break;
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ nextNode = DecomposeSimd(use);
+ break;
+#endif // FEATURE_SIMD
+
case GT_LOCKADD:
case GT_XADD:
case GT_XCHG:
@@ -411,6 +417,8 @@ GenTree* DecomposeLongs::DecomposeLclFld(LIR::Use& use)
GenTree* hiResult = m_compiler->gtNewLclFldNode(loResult->gtLclNum, TYP_INT, loResult->gtLclOffs + 4);
Range().InsertAfter(loResult, hiResult);
+ m_compiler->lvaIncRefCnts(hiResult);
+
return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
@@ -1560,6 +1568,163 @@ GenTree* DecomposeLongs::DecomposeUMod(LIR::Use& use)
return FinalizeDecomposition(use, loResult, hiResult, hiResult);
}
+#ifdef FEATURE_SIMD
+
+//------------------------------------------------------------------------
+// DecomposeSimd: Decompose GT_SIMD.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeSimd(LIR::Use& use)
+{
+ GenTree* tree = use.Def();
+ genTreeOps oper = tree->OperGet();
+
+ assert(oper == GT_SIMD);
+
+ GenTreeSIMD* simdTree = tree->AsSIMD();
+
+ switch (simdTree->gtSIMDIntrinsicID)
+ {
+ case SIMDIntrinsicGetItem:
+ return DecomposeSimdGetItem(use);
+
+ default:
+ noway_assert(!"unexpected GT_SIMD node in long decomposition");
+ break;
+ }
+
+ return nullptr;
+}
+
+//------------------------------------------------------------------------
+// DecomposeSimdGetItem: Decompose GT_SIMD -- SIMDIntrinsicGetItem.
+//
+// Decompose a get[i] node on Vector<long>. For:
+//
+// GT_SIMD{get_item}[long](simd_var, index)
+//
+// create:
+//
+// tmp_simd_var = simd_var
+// tmp_index = index
+// loResult = GT_SIMD{get_item}[int](tmp_simd_var, tmp_index * 2)
+// hiResult = GT_SIMD{get_item}[int](tmp_simd_var, tmp_index * 2 + 1)
+// return: GT_LONG(loResult, hiResult)
+//
+// This isn't optimal codegen, since SIMDIntrinsicGetItem sometimes requires
+// temps that could be shared, for example.
+//
+// Arguments:
+// use - the LIR::Use object for the def that needs to be decomposed.
+//
+// Return Value:
+// The next node to process.
+//
+GenTree* DecomposeLongs::DecomposeSimdGetItem(LIR::Use& use)
+{
+ GenTree* tree = use.Def();
+ genTreeOps oper = tree->OperGet();
+
+ assert(oper == GT_SIMD);
+
+ GenTreeSIMD* simdTree = tree->AsSIMD();
+ var_types baseType = simdTree->gtSIMDBaseType;
+ unsigned simdSize = simdTree->gtSIMDSize;
+
+ assert(simdTree->gtSIMDIntrinsicID == SIMDIntrinsicGetItem);
+ assert(varTypeIsLong(baseType));
+ assert(varTypeIsLong(simdTree));
+ assert(varTypeIsSIMD(simdTree->gtOp.gtOp1->gtType));
+ assert(simdTree->gtOp.gtOp2->gtType == TYP_INT);
+
+ bool indexIsConst = simdTree->gtOp.gtOp2->IsCnsIntOrI();
+ ssize_t index = 0;
+ if (indexIsConst)
+ {
+ index = simdTree->gtOp.gtOp2->gtIntCon.gtIconVal;
+ }
+
+ LIR::Use op1(Range(), &simdTree->gtOp.gtOp1, simdTree);
+ unsigned simdTmpVarNum = op1.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ JITDUMP("[DecomposeSimdGetItem]: Saving op1 tree to a temp var:\n");
+ DISPTREERANGE(Range(), op1.Def());
+
+ unsigned indexTmpVarNum = 0;
+ if (!indexIsConst)
+ {
+ LIR::Use op2(Range(), &simdTree->gtOp.gtOp2, simdTree);
+ indexTmpVarNum = op2.ReplaceWithLclVar(m_compiler, m_blockWeight);
+ JITDUMP("[DecomposeSimdGetItem]: Saving op2 tree to a temp var:\n");
+ DISPTREERANGE(Range(), op2.Def());
+ }
+
+ // Create:
+ // loResult = GT_SIMD{get_item}[int](tmp_simd_var, index * 2)
+
+ GenTree* simdTmpVar1 = m_compiler->gtNewLclLNode(simdTmpVarNum, simdTree->gtOp.gtOp1->gtType);
+ GenTree* indexTimesTwo1;
+
+ if (indexIsConst)
+ {
+ // Reuse the existing index constant node.
+ indexTimesTwo1 = simdTree->gtOp.gtOp2;
+ Range().Remove(indexTimesTwo1);
+ indexTimesTwo1->gtIntCon.gtIconVal = index * 2;
+
+ Range().InsertBefore(simdTree, simdTmpVar1, indexTimesTwo1);
+ }
+ else
+ {
+ GenTree* indexTmpVar1 = m_compiler->gtNewLclLNode(indexTmpVarNum, TYP_INT);
+ GenTree* two1 = m_compiler->gtNewIconNode(2, TYP_INT);
+ indexTimesTwo1 = m_compiler->gtNewOperNode(GT_MUL, TYP_INT, indexTmpVar1, two1);
+ Range().InsertBefore(simdTree, simdTmpVar1, indexTmpVar1, two1, indexTimesTwo1);
+ }
+
+ GenTree* loResult =
+ m_compiler->gtNewSIMDNode(TYP_INT, simdTmpVar1, indexTimesTwo1, SIMDIntrinsicGetItem, TYP_INT, simdSize);
+ Range().InsertBefore(simdTree, loResult);
+
+ // Create:
+ // hiResult = GT_SIMD{get_item}[int](tmp_simd_var, index * 2 + 1)
+
+ GenTree* simdTmpVar2 = m_compiler->gtNewLclLNode(simdTmpVarNum, simdTree->gtOp.gtOp1->gtType);
+ GenTree* indexTimesTwoPlusOne;
+
+ if (indexIsConst)
+ {
+ indexTimesTwoPlusOne = m_compiler->gtNewIconNode(index * 2 + 1, TYP_INT);
+ Range().InsertBefore(simdTree, simdTmpVar2, indexTimesTwoPlusOne);
+ }
+ else
+ {
+ GenTree* indexTmpVar2 = m_compiler->gtNewLclLNode(indexTmpVarNum, TYP_INT);
+ GenTree* two2 = m_compiler->gtNewIconNode(2, TYP_INT);
+ GenTree* indexTimesTwo2 = m_compiler->gtNewOperNode(GT_MUL, TYP_INT, indexTmpVar2, two2);
+ GenTree* one = m_compiler->gtNewIconNode(1, TYP_INT);
+ indexTimesTwoPlusOne = m_compiler->gtNewOperNode(GT_ADD, TYP_INT, indexTimesTwo2, one);
+ Range().InsertBefore(simdTree, simdTmpVar2, indexTmpVar2, two2, indexTimesTwo2);
+ Range().InsertBefore(simdTree, one, indexTimesTwoPlusOne);
+ }
+
+ GenTree* hiResult =
+ m_compiler->gtNewSIMDNode(TYP_INT, simdTmpVar2, indexTimesTwoPlusOne, SIMDIntrinsicGetItem, TYP_INT, simdSize);
+ Range().InsertBefore(simdTree, hiResult);
+
+ // Done with the original tree; remove it.
+
+ Range().Remove(simdTree);
+
+ return FinalizeDecomposition(use, loResult, hiResult, hiResult);
+}
+
+#endif // FEATURE_SIMD
+
//------------------------------------------------------------------------
// StoreNodeToVar: Check if the user is a STORE_LCL_VAR, and if it isn't,
// store the node to a var. Then decompose the new LclVar.
diff --git a/src/jit/decomposelongs.h b/src/jit/decomposelongs.h
index 8965a0b330..ff4f4ac880 100644
--- a/src/jit/decomposelongs.h
+++ b/src/jit/decomposelongs.h
@@ -55,6 +55,8 @@ private:
GenTree* DecomposeRotate(LIR::Use& use);
GenTree* DecomposeMul(LIR::Use& use);
GenTree* DecomposeUMod(LIR::Use& use);
+ GenTree* DecomposeSimd(LIR::Use& use);
+ GenTree* DecomposeSimdGetItem(LIR::Use& use);
// Helper functions
GenTree* FinalizeDecomposition(LIR::Use& use, GenTree* loResult, GenTree* hiResult, GenTree* insertResultAfter);
diff --git a/src/jit/ee_il_dll.cpp b/src/jit/ee_il_dll.cpp
index dcadaa9453..d5705ab353 100644
--- a/src/jit/ee_il_dll.cpp
+++ b/src/jit/ee_il_dll.cpp
@@ -409,13 +409,16 @@ unsigned CILJit::getMaxIntrinsicSIMDVectorLength(DWORD cpuCompileFlags)
{
if (JitConfig.EnableAVX() != 0)
{
+ JITDUMP("getMaxIntrinsicSIMDVectorLength: returning 32\n");
return 32;
}
}
#endif // FEATURE_AVX_SUPPORT
+ JITDUMP("getMaxIntrinsicSIMDVectorLength: returning 16\n");
return 16;
#endif // _TARGET_XARCH_
#else // !FEATURE_SIMD
+ JITDUMP("getMaxIntrinsicSIMDVectorLength: returning 0\n");
return 0;
#endif // !FEATURE_SIMD
}
diff --git a/src/jit/emit.cpp b/src/jit/emit.cpp
index 0929b7392e..1e566b2e76 100644
--- a/src/jit/emit.cpp
+++ b/src/jit/emit.cpp
@@ -1643,12 +1643,10 @@ void emitter::emitCreatePlaceholderIG(insGroupPlaceholderType igType,
{
igPh->igFlags |= IGF_FUNCLET_PROLOG;
}
-#ifdef DEBUG
else if (igType == IGPT_FUNCLET_EPILOG)
{
igPh->igFlags |= IGF_FUNCLET_EPILOG;
}
-#endif // DEBUG
#endif // FEATURE_EH_FUNCLETS
/* Link it into the placeholder list */
diff --git a/src/jit/emit.h b/src/jit/emit.h
index 5b1a395379..f57cc0a0f7 100644
--- a/src/jit/emit.h
+++ b/src/jit/emit.h
@@ -270,16 +270,14 @@ struct insGroup
#define IGF_FINALLY_TARGET 0x0004 // this group is the start of a basic block that is returned to after a finally.
#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
#define IGF_FUNCLET_PROLOG 0x0008 // this group belongs to a funclet prolog
-#ifdef DEBUG
-#define IGF_FUNCLET_EPILOG 0x0010 // this group belongs to a funclet epilog. Currently, this is only needed for DEBUG.
-#endif
-#define IGF_EPILOG 0x0020 // this group belongs to a main function epilog
-#define IGF_NOGCINTERRUPT 0x0040 // this IG is is a no-interrupt region (prolog, epilog, etc.)
-#define IGF_UPD_ISZ 0x0080 // some instruction sizes updated
-#define IGF_PLACEHOLDER 0x0100 // this is a placeholder group, to be filled in later
-#define IGF_EMIT_ADD 0x0200 // this is a block added by the emitter
- // because the codegen block was too big. Also used for
- // placeholder IGs that aren't also labels.
+#define IGF_FUNCLET_EPILOG 0x0010 // this group belongs to a funclet epilog.
+#define IGF_EPILOG 0x0020 // this group belongs to a main function epilog
+#define IGF_NOGCINTERRUPT 0x0040 // this IG is is a no-interrupt region (prolog, epilog, etc.)
+#define IGF_UPD_ISZ 0x0080 // some instruction sizes updated
+#define IGF_PLACEHOLDER 0x0100 // this is a placeholder group, to be filled in later
+#define IGF_EMIT_ADD 0x0200 // this is a block added by the emitter
+ // because the codegen block was too big. Also used for
+ // placeholder IGs that aren't also labels.
// Mask of IGF_* flags that should be propagated to new blocks when they are created.
// This allows prologs and epilogs to be any number of IGs, but still be
@@ -491,12 +489,11 @@ protected:
return (ig != nullptr) && ((ig->igFlags & IGF_FUNCLET_PROLOG) != 0);
}
-#ifdef DEBUG
bool emitIGisInFuncletEpilog(const insGroup* ig)
{
return (ig != nullptr) && ((ig->igFlags & IGF_FUNCLET_EPILOG) != 0);
}
-#endif // DEBUG
+
#endif // FEATURE_EH_FUNCLETS
// If "ig" corresponds to the start of a basic block that is the
diff --git a/src/jit/emitarm.cpp b/src/jit/emitarm.cpp
index 45928ca2d2..1b3ef1bdc7 100644
--- a/src/jit/emitarm.cpp
+++ b/src/jit/emitarm.cpp
@@ -7536,31 +7536,53 @@ void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node)
switch (node->OperGet())
{
case GT_IND:
- {
- GenTree* addr = node->gtGetOp1();
- assert(!addr->isContained());
- codeGen->genConsumeReg(addr);
- emitIns_R_R(ins, attr, node->gtRegNum, addr->gtRegNum);
- }
- break;
-
case GT_STOREIND:
{
- GenTree* addr = node->gtGetOp1();
- GenTree* data = node->gtOp.gtOp2;
+ GenTreeIndir* indir = node->AsIndir();
+ GenTree* addr = indir->Addr();
+ GenTree* data = indir->gtOp.gtOp2;
- assert(!addr->isContained());
- assert(!data->isContained());
- codeGen->genConsumeReg(addr);
- codeGen->genConsumeReg(data);
+ regNumber reg = (node->OperGet() == GT_IND) ? node->gtRegNum : data->gtRegNum;
- if (addr->OperGet() == GT_CLS_VAR_ADDR)
+ if (addr->isContained())
{
- emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
+ assert(addr->OperGet() == GT_LCL_VAR_ADDR || addr->OperGet() == GT_LEA);
+
+ int offset = 0;
+ DWORD lsl = 0;
+
+ if (addr->OperGet() == GT_LEA)
+ {
+ offset = (int)addr->AsAddrMode()->gtOffset;
+ if (addr->AsAddrMode()->gtScale > 0)
+ {
+ assert(isPow2(addr->AsAddrMode()->gtScale));
+ BitScanForward(&lsl, addr->AsAddrMode()->gtScale);
+ }
+ }
+
+ GenTree* memBase = indir->Base();
+
+ if (indir->HasIndex())
+ {
+ NYI_ARM("emitInsMov HasIndex");
+ }
+ else
+ {
+ // TODO check offset is valid for encoding
+ emitIns_R_R_I(ins, attr, reg, memBase->gtRegNum, offset);
+ }
}
else
{
- emitIns_R_R(ins, attr, addr->gtRegNum, data->gtRegNum);
+ if (addr->OperGet() == GT_CLS_VAR_ADDR)
+ {
+ emitIns_C_R(ins, attr, addr->gtClsVar.gtClsVarHnd, data->gtRegNum, 0);
+ }
+ else
+ {
+ emitIns_R_R(ins, attr, reg, addr->gtRegNum);
+ }
}
}
break;
@@ -7581,7 +7603,6 @@ void emitter::emitInsMov(instruction ins, emitAttr attr, GenTree* node)
else
{
assert(!data->isContained());
- codeGen->genConsumeReg(data);
emitIns_S_R(ins, attr, data->gtRegNum, varNode->GetLclNum(), 0);
codeGen->genUpdateLife(varNode);
}
diff --git a/src/jit/emitarm64.cpp b/src/jit/emitarm64.cpp
index 12c4087115..dd4bac808a 100644
--- a/src/jit/emitarm64.cpp
+++ b/src/jit/emitarm64.cpp
@@ -10892,7 +10892,6 @@ void emitter::emitInsLoadStoreOp(instruction ins, emitAttr attr, regNumber dataR
}
else // addr is not contained, so we evaluate it into a register
{
- codeGen->genConsumeReg(addr);
// Then load/store dataReg from/to [addrReg]
emitIns_R_R(ins, ldstAttr, dataReg, addr->gtRegNum);
}
diff --git a/src/jit/emitxarch.cpp b/src/jit/emitxarch.cpp
index b6bacfa520..be5cefbfea 100644
--- a/src/jit/emitxarch.cpp
+++ b/src/jit/emitxarch.cpp
@@ -57,10 +57,6 @@ bool emitter::IsAVXInstruction(instruction ins)
#endif
}
-#ifdef _TARGET_AMD64_
-#define REX_PREFIX_MASK 0xFF00000000LL
-#endif // _TARGET_AMD64_
-
#ifdef FEATURE_AVX_SUPPORT
// Returns true if the AVX instruction is a binary operator that requires 3 operands.
// When we emit an instruction with only two operands, we will duplicate the destination
@@ -717,12 +713,10 @@ unsigned emitter::emitGetPrefixSize(code_t code)
return 3;
}
-#ifdef _TARGET_AMD64_
- if (code & REX_PREFIX_MASK)
+ if (hasRexPrefix(code))
{
return 1;
}
-#endif // _TARGET_AMD64_
return 0;
}
@@ -898,7 +892,8 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
// The following SSE2 instructions write to a general purpose integer register.
if (!IsSSEOrAVXInstruction(ins) || ins == INS_mov_xmm2i || ins == INS_cvttsd2si
#ifndef LEGACY_BACKEND
- || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si
+ || ins == INS_cvttss2si || ins == INS_cvtsd2si || ins == INS_cvtss2si || ins == INS_pmovmskb ||
+ ins == INS_pextrw
#endif // !LEGACY_BACKEND
)
{
@@ -1881,10 +1876,9 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
}
}
-#ifdef _TARGET_AMD64_
size += emitGetVexPrefixAdjustedSize(ins, attrSize, code);
- if (code & REX_PREFIX_MASK)
+ if (hasRexPrefix(code))
{
// REX prefix
size += emitGetRexPrefixSize(ins);
@@ -1899,7 +1893,6 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code)
// Should have a REX byte
size += emitGetRexPrefixSize(ins);
}
-#endif // _TARGET_AMD64_
if (rgx == REG_NA)
{
@@ -2302,9 +2295,7 @@ void emitter::emitIns(instruction ins)
}
#endif // DEBUG
-#ifdef _TARGET_AMD64_
- assert((code & REX_PREFIX_MASK) == 0); // Can't have a REX bit with no operands, right?
-#endif // _TARGET_AMD64_
+ assert(!hasRexPrefix(code)); // Can't have a REX bit with no operands, right?
if (code & 0xFF000000)
{
@@ -2786,20 +2777,19 @@ CORINFO_FIELD_HANDLE emitter::emitFltOrDblConst(GenTreeDblCon* tree, emitAttr at
regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, GenTree* src)
{
// dst can only be a reg or modrm
- assert(!dst->isContained() || dst->isContainedMemoryOp() ||
- instrIs3opImul(ins)); // dst on these isn't really the dst
+ assert(!dst->isContained() || dst->isUsedFromMemory() || instrIs3opImul(ins)); // dst on these isn't really the dst
#ifdef DEBUG
// src can be anything but both src and dst cannot be addr modes
// or at least cannot be contained addr modes
- if (dst->isContainedMemoryOp())
+ if (dst->isUsedFromMemory())
{
- assert(!src->isContainedMemoryOp());
+ assert(!src->isUsedFromMemory());
}
- if (src->isContainedMemoryOp())
+ if (src->isUsedFromMemory())
{
- assert(!dst->isContainedMemoryOp());
+ assert(!dst->isUsedFromMemory());
}
#endif
@@ -2837,7 +2827,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
// find local field if any
GenTreeLclFld* lclField = nullptr;
- if (src->isContainedLclField())
+ if (src->isLclFldUsedFromMemory())
{
lclField = src->AsLclFld();
}
@@ -2848,12 +2838,12 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
// find contained lcl var if any
GenTreeLclVar* lclVar = nullptr;
- if (src->isContainedLclVar())
+ if (src->isLclVarUsedFromMemory())
{
assert(src->IsRegOptional());
lclVar = src->AsLclVar();
}
- else if (dst->isContainedLclVar())
+ if (dst->isLclVarUsedFromMemory())
{
assert(dst->IsRegOptional());
lclVar = dst->AsLclVar();
@@ -2861,12 +2851,12 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
// find contained spill tmp if any
TempDsc* tmpDsc = nullptr;
- if (src->isContainedSpillTemp())
+ if (src->isUsedFromSpillTemp())
{
assert(src->IsRegOptional());
tmpDsc = codeGen->getSpillTempDsc(src);
}
- else if (dst->isContainedSpillTemp())
+ else if (dst->isUsedFromSpillTemp())
{
assert(dst->IsRegOptional());
tmpDsc = codeGen->getSpillTempDsc(dst);
@@ -2952,7 +2942,7 @@ regNumber emitter::emitInsBinary(instruction ins, emitAttr attr, GenTree* dst, G
if (varNum != BAD_VAR_NUM || tmpDsc != nullptr)
{
// Is the memory op in the source position?
- if (src->isContainedMemoryOp())
+ if (src->isUsedFromMemory())
{
if (instrHasImplicitRegPairDest(ins))
{
@@ -3997,16 +3987,14 @@ void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE f
code_t code = insCodeMI(ins);
UNATIVE_OFFSET sz = emitInsSizeCV(id, code, val);
-#ifdef _TARGET_AMD64_
// Vex prefix
sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
// REX prefix, if not already included in "code"
- if (TakesRexWPrefix(ins, attr) && (code & REX_PREFIX_MASK) == 0)
+ if (TakesRexWPrefix(ins, attr) && !hasRexPrefix(code))
{
sz += emitGetRexPrefixSize(ins);
}
-#endif // _TARGET_AMD64_
id->idAddr()->iiaFieldHnd = fldHnd;
id->idCodeSize(sz);
@@ -8055,10 +8043,7 @@ DONE:
}
else
{
- if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
- {
- }
- else
+ if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
{
switch (id->idInsFmt())
{
@@ -8450,10 +8435,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
}
else
{
- if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
- {
- }
- else
+ if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
{
switch (id->idInsFmt())
{
@@ -8883,10 +8865,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
}
else
{
- if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
- {
- }
- else
+ if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
{
switch (id->idInsFmt())
{
@@ -9428,10 +9407,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
}
else
{
- if (emitInsCanOnlyWriteSSE2OrAVXReg(id))
- {
- }
- else
+ if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
{
switch (id->idInsFmt())
{
@@ -10832,6 +10808,12 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
dst += emitOutputByte(dst, emitGetInsSC(id));
sz = emitSizeOfInsDsc(id);
+
+ // Kill any GC ref in the destination register if necessary.
+ if (!emitInsCanOnlyWriteSSE2OrAVXReg(id))
+ {
+ emitGCregDeadUpd(id->idReg1(), dst);
+ }
break;
/********************************************************************/
@@ -11202,9 +11184,14 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
assert(sz == emitSizeOfInsDsc(id));
#if !FEATURE_FIXED_OUT_ARGS
+ bool updateStackLevel = !emitIGisInProlog(ig) && !emitIGisInEpilog(ig);
+
+#if FEATURE_EH_FUNCLETS
+ updateStackLevel = updateStackLevel && !emitIGisInFuncletProlog(ig) && !emitIGisInFuncletEpilog(ig);
+#endif // FEATURE_EH_FUNCLETS
// Make sure we keep the current stack level up to date
- if (!emitIGisInProlog(ig) && !emitIGisInEpilog(ig))
+ if (updateStackLevel)
{
switch (ins)
{
diff --git a/src/jit/emitxarch.h b/src/jit/emitxarch.h
index 98256cdaa7..9c435e5d87 100644
--- a/src/jit/emitxarch.h
+++ b/src/jit/emitxarch.h
@@ -109,6 +109,16 @@ void SetUseSSE3_4(bool value)
}
bool Is4ByteSSE4Instruction(instruction ins);
+bool hasRexPrefix(code_t code)
+{
+#ifdef _TARGET_AMD64_
+ const code_t REX_PREFIX_MASK = 0xFF00000000LL;
+ return (code & REX_PREFIX_MASK) != 0;
+#else // !_TARGET_AMD64_
+ return false;
+#endif // !_TARGET_AMD64_
+}
+
#ifdef FEATURE_AVX_SUPPORT
// 3-byte VEX prefix starts with byte 0xC4
@@ -150,6 +160,26 @@ void SetUseAVX(bool value)
useAVXEncodings = value;
}
+bool containsAVXInstruction = false;
+bool ContainsAVX()
+{
+ return containsAVXInstruction;
+}
+void SetContainsAVX(bool value)
+{
+ containsAVXInstruction = value;
+}
+
+bool contains256bitAVXInstruction = false;
+bool Contains256bitAVX()
+{
+ return contains256bitAVXInstruction;
+}
+void SetContains256bitAVX(bool value)
+{
+ contains256bitAVXInstruction = value;
+}
+
bool IsThreeOperandBinaryAVXInstruction(instruction ins);
bool IsThreeOperandMoveAVXInstruction(instruction ins);
bool IsThreeOperandAVXInstruction(instruction ins)
@@ -158,7 +188,15 @@ bool IsThreeOperandAVXInstruction(instruction ins)
}
bool Is4ByteAVXInstruction(instruction ins);
#else // !FEATURE_AVX_SUPPORT
-bool UseAVX()
+bool UseAVX()
+{
+ return false;
+}
+bool ContainsAVX()
+{
+ return false;
+}
+bool Contains256bitAVX()
{
return false;
}
diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp
index 441569c339..50318b0940 100644
--- a/src/jit/flowgraph.cpp
+++ b/src/jit/flowgraph.cpp
@@ -8550,8 +8550,12 @@ void Compiler::fgAddInternal()
GenTreeStmt* Compiler::fgNewStmtFromTree(GenTreePtr tree, BasicBlock* block, IL_OFFSETX offs)
{
GenTreeStmt* stmt = gtNewStmt(tree, offs);
- gtSetStmtInfo(stmt);
- fgSetStmtSeq(stmt);
+
+ if (fgStmtListThreaded)
+ {
+ gtSetStmtInfo(stmt);
+ fgSetStmtSeq(stmt);
+ }
#if DEBUG
if (block != nullptr)
@@ -11654,6 +11658,7 @@ DONE:
void Compiler::fgClearFinallyTargetBit(BasicBlock* block)
{
+ assert(fgComputePredsDone);
assert((block->bbFlags & BBF_FINALLY_TARGET) != 0);
for (flowList* pred = block->bbPreds; pred; pred = pred->flNext)
@@ -12946,6 +12951,12 @@ bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBloc
optimizeJump = false;
}
+ // Don't optimize a jump to a cloned finally
+ if (bDest->bbFlags & BBF_CLONED_FINALLY_BEGIN)
+ {
+ optimizeJump = false;
+ }
+
#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
// Don't optimize a jump to a finally target. For BB1->BB2->BB3, where
// BB2 is a finally target, if we changed BB1 to jump directly to BB3,
@@ -13747,7 +13758,7 @@ bool Compiler::fgOptimizeBranchToNext(BasicBlock* block, BasicBlock* bNext, Basi
{
assert(block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_ALWAYS);
assert(block->bbJumpDest == bNext);
- assert(block->bbNext = bNext);
+ assert(block->bbNext == bNext);
assert(block->bbPrev == bPrev);
if (block->bbJumpKind == BBJ_ALWAYS)
@@ -17782,7 +17793,7 @@ void Compiler::fgSetTreeSeqHelper(GenTreePtr tree, bool isLIR)
if (kind & GTK_SMPOP)
{
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
// Special handling for GT_LIST
if (tree->OperGet() == GT_LIST)
@@ -18004,8 +18015,8 @@ void Compiler::fgSetTreeSeqHelper(GenTreePtr tree, bool isLIR)
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
// Evaluate the trees left to right
- fgSetTreeSeqHelper(tree->gtBoundsChk.gtArrLen, isLIR);
fgSetTreeSeqHelper(tree->gtBoundsChk.gtIndex, isLIR);
+ fgSetTreeSeqHelper(tree->gtBoundsChk.gtArrLen, isLIR);
break;
case GT_STORE_DYN_BLK:
@@ -20318,7 +20329,7 @@ void Compiler::fgDebugCheckFlags(GenTreePtr tree)
else if (kind & GTK_SMPOP)
{
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
// During GS work, we make shadow copies for params.
// In gsParamsToShadows(), we create a shadow var of TYP_INT for every small type param.
@@ -21970,6 +21981,13 @@ _Done:
compNeedsGSSecurityCookie |= InlineeCompiler->compNeedsGSSecurityCookie;
compGSReorderStackLayout |= InlineeCompiler->compGSReorderStackLayout;
+#ifdef FEATURE_SIMD
+ if (InlineeCompiler->usesSIMDTypes())
+ {
+ setUsesSIMDTypes(true);
+ }
+#endif // FEATURE_SIMD
+
// Update unmanaged call count
info.compCallUnmanaged += InlineeCompiler->info.compCallUnmanaged;
@@ -22471,3 +22489,1770 @@ void Compiler::fgLclFldAssign(unsigned lclNum)
lvaSetVarDoNotEnregister(lclNum DEBUGARG(DNER_LocalField));
}
}
+
+//------------------------------------------------------------------------
+// fgRemoveEmptyFinally: Remove try/finallys where the finally is empty
+//
+// Notes:
+// Removes all try/finallys in the method with empty finallys.
+// These typically arise from inlining empty Dispose methods.
+//
+// Converts callfinally to a jump to the finally continuation.
+// Removes the finally, and reparents all blocks in the try to the
+// enclosing try or method region.
+//
+// Currently limited to trivially empty finallys: those with one basic
+// block containing only single RETFILT statement. It is possible but
+// not likely that more complex-looking finallys will eventually become
+// empty (from say subsequent optimization). An SPMI run with
+// just the "detection" part of this phase run after optimization
+// found only one example where a new empty finally was detected.
+
+void Compiler::fgRemoveEmptyFinally()
+{
+ JITDUMP("\n*************** In fgRemoveEmptyFinally()\n");
+
+ if (compHndBBtabCount == 0)
+ {
+ JITDUMP("No EH in this method, nothing to remove.\n");
+ return;
+ }
+
+ if (opts.MinOpts())
+ {
+ JITDUMP("Method compiled with minOpts, no removal.\n");
+ return;
+ }
+
+ if (opts.compDbgCode)
+ {
+ JITDUMP("Method compiled with debug codegen, no removal.\n");
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** Before fgRemoveEmptyFinally()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+#endif // DEBUG
+
+ // Look for finallys or faults that are empty.
+ unsigned finallyCount = 0;
+ unsigned emptyCount = 0;
+ unsigned XTnum = 0;
+ while (XTnum < compHndBBtabCount)
+ {
+ EHblkDsc* const HBtab = &compHndBBtab[XTnum];
+
+ // Check if this is a try/finally. We could also look for empty
+ // try/fault but presumably those are rare.
+ if (!HBtab->HasFinallyHandler())
+ {
+ JITDUMP("EH#%u is not a try-finally; skipping.\n", XTnum);
+ XTnum++;
+ continue;
+ }
+
+ finallyCount++;
+
+ // Look at blocks involved.
+ BasicBlock* const firstBlock = HBtab->ebdHndBeg;
+ BasicBlock* const lastBlock = HBtab->ebdHndLast;
+
+ // Limit for now to finallys that are single blocks.
+ if (firstBlock != lastBlock)
+ {
+ JITDUMP("EH#%u finally has multiple basic blocks; skipping.\n", XTnum);
+ XTnum++;
+ continue;
+ }
+
+ // Limit for now to finallys that contain only a GT_RETFILT.
+ bool isEmpty = true;
+
+ for (GenTreeStmt* stmt = firstBlock->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt)
+ {
+ GenTreePtr stmtExpr = stmt->gtStmtExpr;
+
+ if (stmtExpr->gtOper != GT_RETFILT)
+ {
+ isEmpty = false;
+ break;
+ }
+ }
+
+ if (!isEmpty)
+ {
+ JITDUMP("EH#%u finally is not empty; skipping.\n", XTnum);
+ XTnum++;
+ continue;
+ }
+
+ JITDUMP("EH#%u has empty finally, removing the region.\n", XTnum);
+
+ // Find all the call finallys that invoke this finally,
+ // and modify them to jump to the return point.
+ BasicBlock* firstCallFinallyRangeBlock = nullptr;
+ BasicBlock* endCallFinallyRangeBlock = nullptr;
+ ehGetCallFinallyBlockRange(XTnum, &firstCallFinallyRangeBlock, &endCallFinallyRangeBlock);
+
+ BasicBlock* currentBlock = firstCallFinallyRangeBlock;
+
+ while (currentBlock != endCallFinallyRangeBlock)
+ {
+ BasicBlock* nextBlock = currentBlock->bbNext;
+
+ if ((currentBlock->bbJumpKind == BBJ_CALLFINALLY) && (currentBlock->bbJumpDest == firstBlock))
+ {
+ // Retarget the call finally to jump to the return
+ // point.
+ //
+ // We don't expect to see retless finallys here, since
+ // the finally is empty.
+ noway_assert(currentBlock->isBBCallAlwaysPair());
+
+ BasicBlock* const leaveBlock = currentBlock->bbNext;
+ BasicBlock* const postTryFinallyBlock = leaveBlock->bbJumpDest;
+
+ noway_assert(leaveBlock->bbJumpKind == BBJ_ALWAYS);
+
+ currentBlock->bbJumpDest = postTryFinallyBlock;
+ currentBlock->bbJumpKind = BBJ_ALWAYS;
+
+ // Ref count updates.
+ fgAddRefPred(postTryFinallyBlock, currentBlock);
+ // fgRemoveRefPred(firstBlock, currentBlock);
+
+ // Delete the leave block, which should be marked as
+ // keep always.
+ assert((leaveBlock->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0);
+ nextBlock = leaveBlock->bbNext;
+
+ leaveBlock->bbFlags &= ~BBF_KEEP_BBJ_ALWAYS;
+ fgRemoveBlock(leaveBlock, true);
+
+ // Cleanup the postTryFinallyBlock
+ fgCleanupContinuation(postTryFinallyBlock);
+
+ // Make sure iteration isn't going off the deep end.
+ assert(leaveBlock != endCallFinallyRangeBlock);
+ }
+
+ currentBlock = nextBlock;
+ }
+
+ // Handler block should now be unreferenced, since the only
+ // explicit references to it were in call finallys.
+ firstBlock->bbRefs = 0;
+
+ // Remove the handler block.
+ const bool unreachable = true;
+ firstBlock->bbFlags &= ~BBF_DONT_REMOVE;
+ fgRemoveBlock(firstBlock, unreachable);
+
+ // Find enclosing try region for the try, if any, and update
+ // the try region. Note the handler region (if any) won't
+ // change.
+ BasicBlock* const firstTryBlock = HBtab->ebdTryBeg;
+ BasicBlock* const lastTryBlock = HBtab->ebdTryLast;
+ assert(firstTryBlock->getTryIndex() == XTnum);
+
+ for (BasicBlock* block = firstTryBlock; block != nullptr; block = block->bbNext)
+ {
+ // Look for blocks directly contained in this try, and
+ // update the try region appropriately.
+ //
+ // Try region for blocks transitively contained (say in a
+ // child try) will get updated by the subsequent call to
+ // fgRemoveEHTableEntry.
+ if (block->getTryIndex() == XTnum)
+ {
+ if (firstBlock->hasTryIndex())
+ {
+ block->setTryIndex(firstBlock->getTryIndex());
+ }
+ else
+ {
+ block->clearTryIndex();
+ }
+ }
+
+ if (block == firstTryBlock)
+ {
+ assert((block->bbFlags & BBF_TRY_BEG) != 0);
+ block->bbFlags &= ~BBF_TRY_BEG;
+ }
+
+ if (block == lastTryBlock)
+ {
+ break;
+ }
+ }
+
+ // Remove the try-finally EH region. This will compact the EH table
+ // so XTnum now points at the next entry.
+ fgRemoveEHTableEntry(XTnum);
+
+ emptyCount++;
+ }
+
+ if (emptyCount > 0)
+ {
+ JITDUMP("fgRemoveEmptyFinally() removed %u try-finally clauses from %u finallys\n", emptyCount, finallyCount);
+ fgOptimizedFinally = true;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** After fgRemoveEmptyFinally()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+
+ fgVerifyHandlerTab();
+ fgDebugCheckBBlist(false, false);
+
+#endif // DEBUG
+ }
+}
+
+//------------------------------------------------------------------------
+// fgRemoveEmptyTry: Optimize try/finallys where the try is empty
+//
+// Notes:
+// In runtimes where thread abort is not possible, `try {} finally {S}`
+// can be optimized to simply `S`. This method looks for such
+// cases and removes the try-finally from the EH table, making
+// suitable flow, block flag, statement, and region updates.
+//
+// This optimization is not legal in runtimes that support thread
+// abort because those runtimes ensure that a finally is completely
+// executed before continuing to process the thread abort. With
+// this optimization, the code block `S` can lose special
+// within-finally status and so complete execution is no longer
+// guaranteed.
+
+void Compiler::fgRemoveEmptyTry()
+{
+ JITDUMP("\n*************** In fgRemoveEmptyTry()\n");
+
+#ifdef FEATURE_CORECLR
+ bool enableRemoveEmptyTry = true;
+#else
+ // Code in a finally gets special treatment in the presence of
+ // thread abort.
+ bool enableRemoveEmptyTry = false;
+#endif // FEATURE_CORECLR
+
+#ifdef DEBUG
+ // Allow override to enable/disable.
+ enableRemoveEmptyTry = (JitConfig.JitEnableRemoveEmptyTry() == 1);
+#endif // DEBUG
+
+ if (!enableRemoveEmptyTry)
+ {
+ JITDUMP("Empty try removal disabled.\n");
+ return;
+ }
+
+ if (compHndBBtabCount == 0)
+ {
+ JITDUMP("No EH in this method, nothing to remove.\n");
+ return;
+ }
+
+ if (opts.MinOpts())
+ {
+ JITDUMP("Method compiled with minOpts, no removal.\n");
+ return;
+ }
+
+ if (opts.compDbgCode)
+ {
+ JITDUMP("Method compiled with debug codegen, no removal.\n");
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** Before fgRemoveEmptyTry()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+#endif // DEBUG
+
+ // Look for try-finallys where the try is empty.
+ unsigned emptyCount = 0;
+ unsigned XTnum = 0;
+ while (XTnum < compHndBBtabCount)
+ {
+ EHblkDsc* const HBtab = &compHndBBtab[XTnum];
+
+ // Check if this is a try/finally. We could also look for empty
+ // try/fault but presumably those are rare.
+ if (!HBtab->HasFinallyHandler())
+ {
+ JITDUMP("EH#%u is not a try-finally; skipping.\n", XTnum);
+ XTnum++;
+ continue;
+ }
+
+ // Examine the try region
+ BasicBlock* const firstTryBlock = HBtab->ebdTryBeg;
+ BasicBlock* const lastTryBlock = HBtab->ebdTryLast;
+ BasicBlock* const firstHandlerBlock = HBtab->ebdHndBeg;
+ BasicBlock* const lastHandlerBlock = HBtab->ebdHndLast;
+ BasicBlock* const endHandlerBlock = lastHandlerBlock->bbNext;
+
+ assert(firstTryBlock->getTryIndex() == XTnum);
+
+ // Limit for now to trys that contain only a callfinally pair
+ // or branch to same.
+ if (!firstTryBlock->isEmpty())
+ {
+ JITDUMP("EH#%u first try block BB%02u not empty; skipping.\n", XTnum, firstTryBlock->bbNum);
+ XTnum++;
+ continue;
+ }
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+
+ // Look for blocks that are always jumps to a call finally
+ // pair that targets the finally
+ if (firstTryBlock->bbJumpKind != BBJ_ALWAYS)
+ {
+ JITDUMP("EH#%u first try block BB%02u not jump to a callfinally; skipping.\n", XTnum, firstTryBlock->bbNum);
+ XTnum++;
+ continue;
+ }
+
+ BasicBlock* const callFinally = firstTryBlock->bbJumpDest;
+
+ // Look for call always pair. Note this will also disqualify
+ // empty try removal in cases where the finally doesn't
+ // return.
+ if (!callFinally->isBBCallAlwaysPair() || (callFinally->bbJumpDest != firstHandlerBlock))
+ {
+ JITDUMP("EH#%u first try block BB%02u always jumps but not to a callfinally; skipping.\n", XTnum,
+ firstTryBlock->bbNum);
+ XTnum++;
+ continue;
+ }
+
+ // Try itself must be a single block.
+ if (firstTryBlock != lastTryBlock)
+ {
+ JITDUMP("EH#%u first try block BB%02u not only block in try; skipping.\n", XTnum,
+ firstTryBlock->bbNext->bbNum);
+ XTnum++;
+ continue;
+ }
+
+#else
+ // Look for call always pair within the try itself. Note this
+ // will also disqualify empty try removal in cases where the
+ // finally doesn't return.
+ if (!firstTryBlock->isBBCallAlwaysPair() || (firstTryBlock->bbJumpDest != firstHandlerBlock))
+ {
+ JITDUMP("EH#%u first try block BB%02u not a callfinally; skipping.\n", XTnum, firstTryBlock->bbNum);
+ XTnum++;
+ continue;
+ }
+
+ BasicBlock* const callFinally = firstTryBlock;
+
+ // Try must be a callalways pair of blocks.
+ if (firstTryBlock->bbNext != lastTryBlock)
+ {
+ JITDUMP("EH#%u block BB%02u not last block in try; skipping.\n", XTnum, firstTryBlock->bbNext->bbNum);
+ XTnum++;
+ continue;
+ }
+
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+ JITDUMP("EH#%u has empty try, removing the try region and promoting the finally.\n", XTnum);
+
+ // There should be just one callfinally that invokes this
+ // finally, the one we found above. Verify this.
+ BasicBlock* firstCallFinallyRangeBlock = nullptr;
+ BasicBlock* endCallFinallyRangeBlock = nullptr;
+ bool verifiedSingleCallfinally = true;
+ ehGetCallFinallyBlockRange(XTnum, &firstCallFinallyRangeBlock, &endCallFinallyRangeBlock);
+
+ for (BasicBlock* block = firstCallFinallyRangeBlock; block != endCallFinallyRangeBlock; block = block->bbNext)
+ {
+ if ((block->bbJumpKind == BBJ_CALLFINALLY) && (block->bbJumpDest == firstHandlerBlock))
+ {
+ assert(block->isBBCallAlwaysPair());
+
+ if (block != callFinally)
+ {
+ JITDUMP("EH#%u found unexpected callfinally BB%02u; skipping.\n");
+ verifiedSingleCallfinally = false;
+ break;
+ }
+
+ block = block->bbNext;
+ }
+ }
+
+ if (!verifiedSingleCallfinally)
+ {
+ JITDUMP("EH#%u -- unexpectedly -- has multiple callfinallys; skipping.\n");
+ XTnum++;
+ assert(verifiedSingleCallfinally);
+ continue;
+ }
+
+ // Time to optimize.
+ //
+ // (1) Convert the callfinally to a normal jump to the handler
+ callFinally->bbJumpKind = BBJ_ALWAYS;
+
+ // Identify the leave block and the continuation
+ BasicBlock* const leave = callFinally->bbNext;
+ BasicBlock* const continuation = leave->bbJumpDest;
+
+ // (2) Cleanup the leave so it can be deleted by subsequent opts
+ assert((leave->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0);
+ leave->bbFlags &= ~BBF_KEEP_BBJ_ALWAYS;
+
+ // (3) Cleanup the continuation
+ fgCleanupContinuation(continuation);
+
+ // (4) Find enclosing try region for the try, if any, and
+ // update the try region for the blocks in the try. Note the
+ // handler region (if any) won't change.
+ //
+ // Kind of overkill to loop here, but hey.
+ for (BasicBlock* block = firstTryBlock; block != nullptr; block = block->bbNext)
+ {
+ // Look for blocks directly contained in this try, and
+ // update the try region appropriately.
+ //
+ // The try region for blocks transitively contained (say in a
+ // child try) will get updated by the subsequent call to
+ // fgRemoveEHTableEntry.
+ if (block->getTryIndex() == XTnum)
+ {
+ if (firstHandlerBlock->hasTryIndex())
+ {
+ block->setTryIndex(firstHandlerBlock->getTryIndex());
+ }
+ else
+ {
+ block->clearTryIndex();
+ }
+ }
+
+ if (block == firstTryBlock)
+ {
+ assert((block->bbFlags & BBF_TRY_BEG) != 0);
+ block->bbFlags &= ~BBF_TRY_BEG;
+ }
+
+ if (block == lastTryBlock)
+ {
+ break;
+ }
+ }
+
+ // (5) Update the directly contained handler blocks' handler index.
+ // Handler index of any nested blocks will update when we
+ // remove the EH table entry. Change handler exits to jump to
+ // the continuation. Clear catch type on handler entry.
+ for (BasicBlock* block = firstHandlerBlock; block != endHandlerBlock; block = block->bbNext)
+ {
+ if (block == firstHandlerBlock)
+ {
+ block->bbCatchTyp = BBCT_NONE;
+ }
+
+ if (block->getHndIndex() == XTnum)
+ {
+ if (firstTryBlock->hasHndIndex())
+ {
+ block->setHndIndex(firstTryBlock->getHndIndex());
+ }
+ else
+ {
+ block->clearHndIndex();
+ }
+
+ if (block->bbJumpKind == BBJ_EHFINALLYRET)
+ {
+ GenTreeStmt* finallyRet = block->lastStmt();
+ GenTreePtr finallyRetExpr = finallyRet->gtStmtExpr;
+ assert(finallyRetExpr->gtOper == GT_RETFILT);
+ fgRemoveStmt(block, finallyRet);
+ block->bbJumpKind = BBJ_ALWAYS;
+ block->bbJumpDest = continuation;
+ }
+ }
+ }
+
+ // (6) Remove the try-finally EH region. This will compact the
+ // EH table so XTnum now points at the next entry and will update
+ // the EH region indices of any nested EH in the (former) handler.
+ fgRemoveEHTableEntry(XTnum);
+
+ // Another one bites the dust...
+ emptyCount++;
+ }
+
+ if (emptyCount > 0)
+ {
+ JITDUMP("fgRemoveEmptyTry() optimized %u empty-try try-finally clauses\n", emptyCount);
+ fgOptimizedFinally = true;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** After fgRemoveEmptyTry()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+
+ fgVerifyHandlerTab();
+ fgDebugCheckBBlist(false, false);
+
+#endif // DEBUG
+ }
+}
+
+//------------------------------------------------------------------------
+// fgCloneFinally: Optimize normal exit path from a try/finally
+//
+// Notes:
+// Handles finallys that are not enclosed by or enclosing other
+// handler regions.
+//
+// Converts the "normal exit" callfinally to a jump to a cloned copy
+// of the finally, which in turn jumps to the finally continuation.
+//
+// If all callfinallys for a given finally are converted to jump to
+// the clone, the try-finally is modified into a try-fault,
+// distingushable from organic try-faults by handler type
+// EH_HANDLER_FAULT_WAS_FINALLY vs the organic EH_HANDLER_FAULT.
+//
+// Does not yet handle thread abort. The open issues here are how
+// to maintain the proper description of the cloned finally blocks
+// as a handler (for thread abort purposes), how to prevent code
+// motion in or out of these blocks, and how to report this cloned
+// handler to the runtime. Some building blocks for thread abort
+// exist (see below) but more work needed.
+//
+// The first and last blocks of the cloned finally are marked with
+// BBF_CLONED_FINALLY_BEGIN and BBF_CLONED_FINALLY_END. However
+// these markers currently can get lost during subsequent
+// optimizations.
+
+void Compiler::fgCloneFinally()
+{
+ JITDUMP("\n*************** In fgCloneFinally()\n");
+
+#ifdef FEATURE_CORECLR
+ bool enableCloning = true;
+#else
+ // Finally cloning currently doesn't provide sufficient protection
+ // for the cloned code in the presence of thread abort.
+ bool enableCloning = false;
+#endif // FEATURE_CORECLR
+
+#ifdef DEBUG
+ // Allow override to enable/disable.
+ enableCloning = (JitConfig.JitEnableFinallyCloning() == 1);
+#endif // DEBUG
+
+ if (!enableCloning)
+ {
+ JITDUMP("Finally cloning disabled.\n");
+ return;
+ }
+
+ if (compHndBBtabCount == 0)
+ {
+ JITDUMP("No EH in this method, no cloning.\n");
+ return;
+ }
+
+ if (opts.MinOpts())
+ {
+ JITDUMP("Method compiled with minOpts, no cloning.\n");
+ return;
+ }
+
+ if (opts.compDbgCode)
+ {
+ JITDUMP("Method compiled with debug codegen, no cloning.\n");
+ return;
+ }
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** Before fgCloneFinally()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+
+ // Verify try-finally exits look good before we start.
+ fgDebugCheckTryFinallyExits();
+
+#endif // DEBUG
+
+ // Look for finallys that are not contained within other handlers,
+ // and which do not themselves contain EH.
+ //
+ // Note these cases potentially could be handled, but are less
+ // obviously profitable and require modification of the handler
+ // table.
+ unsigned XTnum = 0;
+ EHblkDsc* HBtab = compHndBBtab;
+ unsigned cloneCount = 0;
+ for (; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ // Check if this is a try/finally
+ if (!HBtab->HasFinallyHandler())
+ {
+ JITDUMP("EH#%u is not a try-finally; skipping.\n", XTnum);
+ continue;
+ }
+
+ // Check if enclosed by another handler.
+ const unsigned enclosingHandlerRegion = ehGetEnclosingHndIndex(XTnum);
+
+ if (enclosingHandlerRegion != EHblkDsc::NO_ENCLOSING_INDEX)
+ {
+ JITDUMP("EH#%u is enclosed by handler EH#%u; skipping.\n", XTnum, enclosingHandlerRegion);
+ continue;
+ }
+
+ bool containsEH = false;
+ unsigned exampleEnclosedHandlerRegion = 0;
+
+ // Only need to look at lower numbered regions because the
+ // handler table is ordered by nesting.
+ for (unsigned i = 0; i < XTnum; i++)
+ {
+ if (ehGetEnclosingHndIndex(i) == XTnum)
+ {
+ exampleEnclosedHandlerRegion = i;
+ containsEH = true;
+ break;
+ }
+ }
+
+ if (containsEH)
+ {
+ JITDUMP("Finally for EH#%u encloses handler EH#%u; skipping.\n", XTnum, exampleEnclosedHandlerRegion);
+ continue;
+ }
+
+ // Look at blocks involved.
+ BasicBlock* const firstBlock = HBtab->ebdHndBeg;
+ BasicBlock* const lastBlock = HBtab->ebdHndLast;
+ assert(firstBlock != nullptr);
+ assert(lastBlock != nullptr);
+ BasicBlock* nextBlock = lastBlock->bbNext;
+ unsigned regionBBCount = 0;
+ unsigned regionStmtCount = 0;
+ bool hasFinallyRet = false;
+ bool isAllRare = true;
+ bool hasSwitch = false;
+
+ for (const BasicBlock* block = firstBlock; block != nextBlock; block = block->bbNext)
+ {
+ if (block->bbJumpKind == BBJ_SWITCH)
+ {
+ hasSwitch = true;
+ break;
+ }
+
+ regionBBCount++;
+
+ // Should we compute statement cost here, or is it
+ // premature...? For now just count statements I guess.
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt)
+ {
+ regionStmtCount++;
+ }
+
+ hasFinallyRet = hasFinallyRet || (block->bbJumpKind == BBJ_EHFINALLYRET);
+ isAllRare = isAllRare && block->isRunRarely();
+ }
+
+ // Skip cloning if the finally has a switch.
+ if (hasSwitch)
+ {
+ JITDUMP("Finally in EH#%u has a switch; skipping.\n", XTnum);
+ continue;
+ }
+
+ // Skip cloning if the finally must throw.
+ if (!hasFinallyRet)
+ {
+ JITDUMP("Finally in EH#%u does not return; skipping.\n", XTnum);
+ continue;
+ }
+
+ // Skip cloning if the finally is rarely run code.
+ if (isAllRare)
+ {
+ JITDUMP("Finally in EH#%u is run rarely; skipping.\n", XTnum);
+ continue;
+ }
+
+ // Empirical studies from CoreCLR and CoreFX show that less
+ // that 1% of finally regions have more than 15
+ // statements. So, to avoid potentially excessive code growth,
+ // only clone finallys that have 15 or fewer statements.
+ const unsigned stmtCountLimit = 15;
+ if (regionStmtCount > stmtCountLimit)
+ {
+ JITDUMP("Finally in EH#%u has %u statements, limit is %u; skipping.\n", XTnum, regionStmtCount,
+ stmtCountLimit);
+ continue;
+ }
+
+ JITDUMP("EH#%u is a candidate for finally cloning:"
+ " %u blocks, %u statements\n",
+ XTnum, regionBBCount, regionStmtCount);
+
+ // Walk the try region backwards looking for the last block
+ // that transfers control to a callfinally.
+ BasicBlock* const firstTryBlock = HBtab->ebdTryBeg;
+ BasicBlock* const lastTryBlock = HBtab->ebdTryLast;
+ assert(firstTryBlock->getTryIndex() == XTnum);
+ assert(lastTryBlock->getTryIndex() == XTnum);
+ BasicBlock* const beforeTryBlock = firstTryBlock->bbPrev;
+
+ BasicBlock* normalCallFinallyBlock = nullptr;
+ BasicBlock* normalCallFinallyReturn = nullptr;
+ BasicBlock* cloneInsertAfter = HBtab->ebdTryLast;
+ bool tryToRelocateCallFinally = false;
+
+ for (BasicBlock* block = lastTryBlock; block != beforeTryBlock; block = block->bbPrev)
+ {
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ // Look for blocks that are always jumps to a call finally
+ // pair that targets our finally.
+ if (block->bbJumpKind != BBJ_ALWAYS)
+ {
+ continue;
+ }
+
+ BasicBlock* const jumpDest = block->bbJumpDest;
+
+ if (!jumpDest->isBBCallAlwaysPair() || (jumpDest->bbJumpDest != firstBlock))
+ {
+ continue;
+ }
+#else
+ // Look for call finally pair directly within the try
+ if (!block->isBBCallAlwaysPair() || (block->bbJumpDest != firstBlock))
+ {
+ continue;
+ }
+
+ BasicBlock* const jumpDest = block;
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+ // Found our block.
+ BasicBlock* const finallyReturnBlock = jumpDest->bbNext;
+ BasicBlock* const postTryFinallyBlock = finallyReturnBlock->bbJumpDest;
+
+ normalCallFinallyBlock = jumpDest;
+ normalCallFinallyReturn = postTryFinallyBlock;
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ // When there are callfinally thunks, we don't expect to see the
+ // callfinally within a handler region either.
+ assert(!jumpDest->hasHndIndex());
+
+ // Update the clone insertion point to just after the
+ // call always pair.
+ cloneInsertAfter = finallyReturnBlock;
+
+ // We will consider moving the callfinally so we can fall
+ // through from the try into the clone.
+ tryToRelocateCallFinally = true;
+
+ JITDUMP("Chose path to clone: try block BB%02u jumps to callfinally at BB%02u;"
+ " the call returns to BB%02u which jumps to BB%02u\n",
+ block->bbNum, jumpDest->bbNum, finallyReturnBlock->bbNum, postTryFinallyBlock->bbNum);
+#else
+ JITDUMP("Chose path to clone: try block BB%02u is a callfinally;"
+ " the call returns to BB%02u which jumps to BB%02u\n",
+ block->bbNum, finallyReturnBlock->bbNum, postTryFinallyBlock->bbNum);
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+ break;
+ }
+
+ // If there is no call to the finally, don't clone.
+ if (normalCallFinallyBlock == nullptr)
+ {
+ JITDUMP("EH#%u: no calls from the try to the finally, skipping.\n", XTnum);
+ continue;
+ }
+
+ JITDUMP("Will update callfinally block BB%02u to jump to the clone;"
+ " clone will jump to BB%02u\n",
+ normalCallFinallyBlock->bbNum, normalCallFinallyReturn->bbNum);
+
+ // If there are multiple callfinallys and we're in the
+ // callfinally thunk model, all the callfinallys are placed
+ // just outside the try region. We'd like our chosen
+ // callfinally to come first after the try, so we can fall out of the try
+ // into the clone.
+ BasicBlock* firstCallFinallyRangeBlock = nullptr;
+ BasicBlock* endCallFinallyRangeBlock = nullptr;
+ ehGetCallFinallyBlockRange(XTnum, &firstCallFinallyRangeBlock, &endCallFinallyRangeBlock);
+
+ if (tryToRelocateCallFinally)
+ {
+ BasicBlock* firstCallFinallyBlock = nullptr;
+
+ for (BasicBlock* block = firstCallFinallyRangeBlock; block != endCallFinallyRangeBlock;
+ block = block->bbNext)
+ {
+ if (block->isBBCallAlwaysPair())
+ {
+ if (block->bbJumpDest == firstBlock)
+ {
+ firstCallFinallyBlock = block;
+ break;
+ }
+ }
+ }
+
+ // We better have found at least one call finally.
+ assert(firstCallFinallyBlock != nullptr);
+
+ // If there is more than one callfinally, move the one we are
+ // going to retarget to be first in the callfinally range.
+ if (firstCallFinallyBlock != normalCallFinallyBlock)
+ {
+ JITDUMP("Moving callfinally BB%02u to be first in line, before BB%02u\n", normalCallFinallyBlock->bbNum,
+ firstCallFinallyBlock->bbNum);
+
+ BasicBlock* const firstToMove = normalCallFinallyBlock;
+ BasicBlock* const lastToMove = normalCallFinallyBlock->bbNext;
+ BasicBlock* const placeToMoveAfter = firstCallFinallyBlock->bbPrev;
+
+ fgUnlinkRange(firstToMove, lastToMove);
+ fgMoveBlocksAfter(firstToMove, lastToMove, placeToMoveAfter);
+
+#ifdef DEBUG
+ // Sanity checks
+ fgDebugCheckBBlist(false, false);
+ fgVerifyHandlerTab();
+#endif // DEBUG
+
+ assert(nextBlock == lastBlock->bbNext);
+
+ // Update where the callfinally range begins, since we might
+ // have altered this with callfinally rearrangement, and/or
+ // the range begin might have been pretty loose to begin with.
+ firstCallFinallyRangeBlock = normalCallFinallyBlock;
+ }
+ }
+
+ // Clone the finally and retarget the normal return path and
+ // any other path that happens to share that same return
+ // point. For instance a construct like:
+ //
+ // try { } catch { } finally { }
+ //
+ // will have two call finally blocks, one for the normal exit
+ // from the try, and the the other for the exit from the
+ // catch. They'll both pass the same return point which is the
+ // statement after the finally, so they can share the clone.
+ //
+ // Clone the finally body, and splice it into the flow graph
+ // within in the parent region of the try.
+ const unsigned finallyTryIndex = firstBlock->bbTryIndex;
+ BasicBlock* insertAfter = nullptr;
+ BlockToBlockMap blockMap(getAllocator());
+ bool clonedOk = true;
+ unsigned cloneBBCount = 0;
+
+ for (BasicBlock* block = firstBlock; block != nextBlock; block = block->bbNext)
+ {
+ BasicBlock* newBlock;
+
+ if (block == firstBlock)
+ {
+ // Put first cloned finally block into the approprate
+ // region, somewhere within or after the range of
+ // callfinallys, depending on the EH implementation.
+ const unsigned hndIndex = 0;
+ BasicBlock* const nearBlk = cloneInsertAfter;
+ newBlock = fgNewBBinRegion(block->bbJumpKind, finallyTryIndex, hndIndex, nearBlk);
+
+ // If the clone ends up just after the finally, adjust
+ // the stopping point for finally traversal.
+ if (newBlock->bbNext == nextBlock)
+ {
+ assert(newBlock->bbPrev == lastBlock);
+ nextBlock = newBlock;
+ }
+ }
+ else
+ {
+ // Put subsequent blocks in the same region...
+ const bool extendRegion = true;
+ newBlock = fgNewBBafter(block->bbJumpKind, insertAfter, extendRegion);
+ }
+
+ cloneBBCount++;
+ assert(cloneBBCount <= regionBBCount);
+
+ insertAfter = newBlock;
+ blockMap.Set(block, newBlock);
+
+ clonedOk = BasicBlock::CloneBlockState(this, newBlock, block);
+
+ if (!clonedOk)
+ {
+ break;
+ }
+
+ // Update block flags. Note a block can be both first and last.
+ if (block == firstBlock)
+ {
+ // Mark the block as the start of the cloned finally.
+ newBlock->bbFlags |= BBF_CLONED_FINALLY_BEGIN;
+ }
+
+ if (block == lastBlock)
+ {
+ // Mark the block as the end of the cloned finally.
+ newBlock->bbFlags |= BBF_CLONED_FINALLY_END;
+ }
+
+ // Make sure clone block state hasn't munged the try region.
+ assert(newBlock->bbTryIndex == finallyTryIndex);
+
+ // Cloned handler block is no longer within the handler.
+ newBlock->clearHndIndex();
+
+ // Jump dests are set in a post-pass; make sure CloneBlockState hasn't tried to set them.
+ assert(newBlock->bbJumpDest == nullptr);
+ }
+
+ if (!clonedOk)
+ {
+ // TODO: cleanup the partial clone?
+ JITDUMP("Unable to clone the finally; skipping.\n");
+ continue;
+ }
+
+ // We should have cloned all the finally region blocks.
+ assert(cloneBBCount == regionBBCount);
+
+ JITDUMP("Cloned finally blocks are: BB%2u ... BB%2u\n", blockMap[firstBlock]->bbNum,
+ blockMap[lastBlock]->bbNum);
+
+ // Redirect redirect any branches within the newly-cloned
+ // finally, and any finally returns to jump to the return
+ // point.
+ for (BasicBlock* block = firstBlock; block != nextBlock; block = block->bbNext)
+ {
+ BasicBlock* newBlock = blockMap[block];
+
+ if (block->bbJumpKind == BBJ_EHFINALLYRET)
+ {
+ GenTreeStmt* finallyRet = newBlock->lastStmt();
+ GenTreePtr finallyRetExpr = finallyRet->gtStmtExpr;
+ assert(finallyRetExpr->gtOper == GT_RETFILT);
+ fgRemoveStmt(newBlock, finallyRet);
+ newBlock->bbJumpKind = BBJ_ALWAYS;
+ newBlock->bbJumpDest = normalCallFinallyReturn;
+
+ fgAddRefPred(normalCallFinallyReturn, newBlock);
+ }
+ else
+ {
+ optCopyBlkDest(block, newBlock);
+ optRedirectBlock(newBlock, &blockMap);
+ }
+ }
+
+ // Modify the targeting call finallys to branch to the cloned
+ // finally. Make a note if we see some calls that can't be
+ // retargeted (since they want to return to other places).
+ BasicBlock* const firstCloneBlock = blockMap[firstBlock];
+ bool retargetedAllCalls = true;
+ BasicBlock* currentBlock = firstCallFinallyRangeBlock;
+
+ while (currentBlock != endCallFinallyRangeBlock)
+ {
+ BasicBlock* nextBlockToScan = currentBlock->bbNext;
+
+ if (currentBlock->isBBCallAlwaysPair())
+ {
+ if (currentBlock->bbJumpDest == firstBlock)
+ {
+ BasicBlock* const leaveBlock = currentBlock->bbNext;
+ BasicBlock* const postTryFinallyBlock = leaveBlock->bbJumpDest;
+
+ // Note we must retarget all callfinallies that have this
+ // continuation, or we can't clean up the continuation
+ // block properly below, since it will be reachable both
+ // by the cloned finally and by the called finally.
+ if (postTryFinallyBlock == normalCallFinallyReturn)
+ {
+ // This call returns to the expected spot, so
+ // retarget it to branch to the clone.
+ currentBlock->bbJumpDest = firstCloneBlock;
+ currentBlock->bbJumpKind = BBJ_ALWAYS;
+
+ // Ref count updates.
+ fgAddRefPred(firstCloneBlock, currentBlock);
+ // fgRemoveRefPred(firstBlock, currentBlock);
+
+ // Delete the leave block, which should be marked as
+ // keep always.
+ assert((leaveBlock->bbFlags & BBF_KEEP_BBJ_ALWAYS) != 0);
+ nextBlock = leaveBlock->bbNext;
+
+ leaveBlock->bbFlags &= ~BBF_KEEP_BBJ_ALWAYS;
+ fgRemoveBlock(leaveBlock, true);
+
+ // Make sure iteration isn't going off the deep end.
+ assert(leaveBlock != endCallFinallyRangeBlock);
+ }
+ else
+ {
+ // We can't retarget this call since it
+ // returns somewhere else.
+ retargetedAllCalls = false;
+ }
+ }
+ }
+
+ currentBlock = nextBlockToScan;
+ }
+
+ // If we retargeted all calls, modify EH descriptor to be
+ // try-fault instead of try-finally, and then non-cloned
+ // finally catch type to be fault.
+ if (retargetedAllCalls)
+ {
+ JITDUMP("All callfinallys retargeted; changing finally to fault.\n");
+ HBtab->ebdHandlerType = EH_HANDLER_FAULT_WAS_FINALLY;
+ firstBlock->bbCatchTyp = BBCT_FAULT;
+ }
+ else
+ {
+ JITDUMP("Some callfinallys *not* retargeted, so region must remain as a finally.\n");
+ }
+
+ // Modify first block of cloned finally to be a "normal" block.
+ BasicBlock* firstClonedBlock = blockMap[firstBlock];
+ firstClonedBlock->bbCatchTyp = BBCT_NONE;
+
+ // Cleanup the contination
+ fgCleanupContinuation(normalCallFinallyReturn);
+
+ // Todo -- mark cloned blocks as a cloned finally....
+
+ // Done!
+ JITDUMP("\nDone with EH#%u\n\n", XTnum);
+ cloneCount++;
+ }
+
+ if (cloneCount > 0)
+ {
+ JITDUMP("fgCloneFinally() cloned %u finally handlers\n", cloneCount);
+ fgOptimizedFinally = true;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\n*************** After fgCloneFinally()\n");
+ fgDispBasicBlocks();
+ fgDispHandlerTab();
+ printf("\n");
+ }
+
+ fgVerifyHandlerTab();
+ fgDebugCheckBBlist(false, false);
+ fgDebugCheckTryFinallyExits();
+
+#endif // DEBUG
+ }
+}
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// fgDebugCheckTryFinallyExits: validate normal flow from try-finally
+// or try-fault-was-finally.
+//
+// Notes:
+//
+// Normal control flow exiting the try block of a try-finally must
+// pass through the finally. This checker attempts to verify that by
+// looking at the control flow graph.
+//
+// Each path that exits the try of a try-finally (including try-faults
+// that were optimized into try-finallys by fgCloneFinally) should
+// thus either execute a callfinally to the associated finally or else
+// jump to a block with the BBF_CLONED_FINALLY_BEGIN flag set.
+//
+// Depending on when this check is done, there may also be an empty
+// block along the path.
+//
+// Depending on the model for invoking finallys, the callfinallies may
+// lie within the try region (callfinally thunks) or in the enclosing
+// region.
+
+void Compiler::fgDebugCheckTryFinallyExits()
+{
+ unsigned XTnum = 0;
+ EHblkDsc* HBtab = compHndBBtab;
+ unsigned cloneCount = 0;
+ bool allTryExitsValid = true;
+ for (; XTnum < compHndBBtabCount; XTnum++, HBtab++)
+ {
+ const EHHandlerType handlerType = HBtab->ebdHandlerType;
+ const bool isFinally = (handlerType == EH_HANDLER_FINALLY);
+ const bool wasFinally = (handlerType == EH_HANDLER_FAULT_WAS_FINALLY);
+
+ // Screen out regions that are or were not finallys.
+ if (!isFinally && !wasFinally)
+ {
+ continue;
+ }
+
+ // Walk blocks of the try, looking for normal control flow to
+ // an ancestor region.
+
+ BasicBlock* const firstTryBlock = HBtab->ebdTryBeg;
+ BasicBlock* const lastTryBlock = HBtab->ebdTryLast;
+ assert(firstTryBlock->getTryIndex() <= XTnum);
+ assert(lastTryBlock->getTryIndex() <= XTnum);
+ BasicBlock* const afterTryBlock = lastTryBlock->bbNext;
+ BasicBlock* const finallyBlock = isFinally ? HBtab->ebdHndBeg : nullptr;
+
+ for (BasicBlock* block = firstTryBlock; block != afterTryBlock; block = block->bbNext)
+ {
+ // Only check the directly contained blocks.
+ assert(block->hasTryIndex());
+
+ if (block->getTryIndex() != XTnum)
+ {
+ continue;
+ }
+
+ // Look at each of the normal control flow possibilities.
+ const unsigned numSuccs = block->NumSucc();
+
+ for (unsigned i = 0; i < numSuccs; i++)
+ {
+ BasicBlock* const succBlock = block->GetSucc(i);
+
+ if (succBlock->hasTryIndex() && succBlock->getTryIndex() <= XTnum)
+ {
+ // Successor does not exit this try region.
+ continue;
+ }
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+
+ // When there are callfinally thunks, callfinallies
+ // logically "belong" to a child region and the exit
+ // path validity will be checked when looking at the
+ // try blocks in that region.
+ if (block->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ continue;
+ }
+
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+ // Now we know block lies directly within the try of a
+ // try-finally, and succBlock is in an enclosing
+ // region (possibly the method region). So this path
+ // represents flow out of the try and should be
+ // checked.
+ //
+ // There are various ways control can properly leave a
+ // try-finally (or try-fault-was-finally):
+ //
+ // (a1) via a jump to a callfinally (only for finallys, only for call finally thunks)
+ // (a2) via a callfinally (only for finallys, only for !call finally thunks)
+ // (b) via a jump to a begin finally clone block
+ // (c) via a jump to an empty block to (b)
+ // (d) via a fallthrough to an empty block to (b)
+ // (e) via the always half of a callfinally pair
+ // (f) via an always jump clonefinally exit
+ bool isCallToFinally = false;
+
+#if FEATURE_EH_CALLFINALLY_THUNKS
+ if (succBlock->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ // case (a1)
+ isCallToFinally = isFinally && (succBlock->bbJumpDest == finallyBlock);
+ }
+#else
+ if (block->bbJumpKind == BBJ_CALLFINALLY)
+ {
+ // case (a2)
+ isCallToFinally = isFinally && (block->bbJumpDest == finallyBlock);
+ }
+#endif // FEATURE_EH_CALLFINALLY_THUNKS
+
+ bool isJumpToClonedFinally = false;
+
+ if (succBlock->bbFlags & BBF_CLONED_FINALLY_BEGIN)
+ {
+ // case (b)
+ isJumpToClonedFinally = true;
+ }
+ else if (succBlock->bbJumpKind == BBJ_ALWAYS)
+ {
+ if (succBlock->isEmpty())
+ {
+ // case (c)
+ BasicBlock* const succSuccBlock = succBlock->bbJumpDest;
+
+ if (succSuccBlock->bbFlags & BBF_CLONED_FINALLY_BEGIN)
+ {
+ isJumpToClonedFinally = true;
+ }
+ }
+ }
+ else if (succBlock->bbJumpKind == BBJ_NONE)
+ {
+ if (succBlock->isEmpty())
+ {
+ BasicBlock* const succSuccBlock = succBlock->bbNext;
+
+ // case (d)
+ if (succSuccBlock->bbFlags & BBF_CLONED_FINALLY_BEGIN)
+ {
+ isJumpToClonedFinally = true;
+ }
+ }
+ }
+
+ bool isReturnFromFinally = false;
+
+ // Case (e). Ideally we'd have something stronger to
+ // check here -- eg that we are returning from a call
+ // to the right finally -- but there are odd cases
+ // like orphaned second halves of callfinally pairs
+ // that we need to tolerate.
+ if (block->bbFlags & BBF_KEEP_BBJ_ALWAYS)
+ {
+ isReturnFromFinally = true;
+ }
+
+ // Case (f)
+ if (block->bbFlags & BBF_CLONED_FINALLY_END)
+ {
+ isReturnFromFinally = true;
+ }
+
+ const bool thisExitValid = isCallToFinally || isJumpToClonedFinally || isReturnFromFinally;
+
+ if (!thisExitValid)
+ {
+ JITDUMP("fgCheckTryFinallyExitS: EH#%u exit via BB%02u -> BB%02u is invalid\n", XTnum, block->bbNum,
+ succBlock->bbNum);
+ }
+
+ allTryExitsValid = allTryExitsValid & thisExitValid;
+ }
+ }
+ }
+
+ if (!allTryExitsValid)
+ {
+ JITDUMP("fgCheckTryFinallyExits: method contains invalid try exit paths\n");
+ assert(allTryExitsValid);
+ }
+}
+
+#endif // DEBUG
+
+//------------------------------------------------------------------------
+// fgCleanupContinuation: cleanup a finally continuation after a
+// finally is removed or converted to normal control flow.
+//
+// Notes:
+// The continuation is the block targeted by the second half of
+// a callfinally/always pair.
+//
+// Used by finally cloning, empty try removal, and empty
+// finally removal.
+//
+// BBF_FINALLY_TARGET bbFlag is left unchanged by this method
+// since it cannot be incrementally updated. Proper updates happen
+// when fgUpdateFinallyTargetFlags runs after all finally optimizations.
+
+void Compiler::fgCleanupContinuation(BasicBlock* continuation)
+{
+ // The continuation may be a finalStep block.
+ // It is now a normal block, so clear the special keep
+ // always flag.
+ continuation->bbFlags &= ~BBF_KEEP_BBJ_ALWAYS;
+
+#if !FEATURE_EH_FUNCLETS
+ // Remove the GT_END_LFIN from the continuation,
+ // Note we only expect to see one such statement.
+ bool foundEndLFin = false;
+ for (GenTreeStmt* stmt = continuation->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt)
+ {
+ GenTreePtr expr = stmt->gtStmtExpr;
+ if (expr->gtOper == GT_END_LFIN)
+ {
+ assert(!foundEndLFin);
+ fgRemoveStmt(continuation, stmt);
+ foundEndLFin = true;
+ }
+ }
+ assert(foundEndLFin);
+#endif // !FEATURE_EH_FUNCLETS
+}
+
+//------------------------------------------------------------------------
+// fgUpdateFinallyTargetFlags: recompute BBF_FINALLY_TARGET bits for all blocks
+// after finally optimizations have run.
+
+void Compiler::fgUpdateFinallyTargetFlags()
+{
+#if FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+
+ // Any fixup required?
+ if (!fgOptimizedFinally)
+ {
+ JITDUMP("In fgUpdateFinallyTargetFlags - no finally opts, no fixup required\n");
+ return;
+ }
+
+ JITDUMP("In fgUpdateFinallyTargetFlags, updating finally target flag bits\n");
+
+ // Walk all blocks, and reset the target bits.
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ block->bbFlags &= ~BBF_FINALLY_TARGET;
+ }
+
+ // Walk all blocks again, and set the target bits.
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ if (block->isBBCallAlwaysPair())
+ {
+ BasicBlock* const leave = block->bbNext;
+ BasicBlock* const continuation = leave->bbJumpDest;
+
+ if ((continuation->bbFlags & BBF_FINALLY_TARGET) == 0)
+ {
+ JITDUMP("Found callfinally BB%02u; setting finally target bit on BB%02u\n", block->bbNum,
+ continuation->bbNum);
+
+ continuation->bbFlags |= BBF_FINALLY_TARGET;
+ }
+ }
+ }
+#endif // FEATURE_EH_FUNCLETS && defined(_TARGET_ARM_)
+}
+
+// FatCalliTransformer transforms calli that can use fat function pointer.
+// Fat function pointer is pointer with the second least significant bit set,
+// if the bit is set, the pointer (after clearing the bit) actually points to
+// a tuple <method pointer, instantiation argument pointer> where
+// instantiationArgument is a hidden first argument required by method pointer.
+//
+// Fat pointers are used in CoreRT as a replacement for instantiating stubs,
+// because CoreRT can't generate stubs in runtime.
+//
+// Jit is responsible for the checking the bit, do the regular call if it is not set
+// or load hidden argument, fix the pointer and make a call with the fixed pointer and
+// the instantiation argument.
+//
+// before:
+// current block
+// {
+// previous statements
+// transforming statement
+// {
+// call with GTF_CALL_M_FAT_POINTER_CHECK flag set in function ptr
+// }
+// subsequent statements
+// }
+//
+// after:
+// current block
+// {
+// previous statements
+// } BBJ_NONE check block
+// check block
+// {
+// jump to else if function ptr has GTF_CALL_M_FAT_POINTER_CHECK set.
+// } BBJ_COND then block, else block
+// then block
+// {
+// original statement
+// } BBJ_ALWAYS remainder block
+// else block
+// {
+// unset GTF_CALL_M_FAT_POINTER_CHECK
+// load actual function pointer
+// load instantiation argument
+// create newArgList = (instantiation argument, original argList)
+// call (actual function pointer, newArgList)
+// } BBJ_NONE remainder block
+// remainder block
+// {
+// subsequent statements
+// }
+//
+class FatCalliTransformer
+{
+public:
+ FatCalliTransformer(Compiler* compiler) : compiler(compiler)
+ {
+ }
+
+ //------------------------------------------------------------------------
+ // Run: run transformation for each block.
+ //
+ void Run()
+ {
+ for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ TransformBlock(block);
+ }
+ }
+
+private:
+ //------------------------------------------------------------------------
+ // TransformBlock: look through statements and transform statements with fat pointer calls.
+ //
+ void TransformBlock(BasicBlock* block)
+ {
+ for (GenTreeStmt* stmt = block->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt)
+ {
+ if (ContainsFatCalli(stmt))
+ {
+ StatementTransformer stmtTransformer(compiler, block, stmt);
+ stmtTransformer.Run();
+ }
+ }
+ }
+
+ //------------------------------------------------------------------------
+ // ContainsFatCalli: check does this statement contain fat pointer call.
+ //
+ // Checks fatPointerCandidate in form of call() or lclVar = call().
+ //
+ // Return Value:
+ // true if contains, false otherwise.
+ //
+ bool ContainsFatCalli(GenTreeStmt* stmt)
+ {
+ GenTreePtr fatPointerCandidate = stmt->gtStmtExpr;
+ if (fatPointerCandidate->OperIsAssignment())
+ {
+ fatPointerCandidate = fatPointerCandidate->gtGetOp2();
+ }
+ return fatPointerCandidate->IsCall() && fatPointerCandidate->AsCall()->IsFatPointerCandidate();
+ }
+
+ class StatementTransformer
+ {
+ public:
+ StatementTransformer(Compiler* compiler, BasicBlock* block, GenTreeStmt* stmt)
+ : compiler(compiler), currBlock(block), stmt(stmt)
+ {
+ remainderBlock = nullptr;
+ checkBlock = nullptr;
+ thenBlock = nullptr;
+ elseBlock = nullptr;
+ doesReturnValue = stmt->gtStmtExpr->OperIsAssignment();
+ origCall = GetCall(stmt);
+ fptrAddress = origCall->gtCallAddr;
+ pointerType = fptrAddress->TypeGet();
+ }
+
+ //------------------------------------------------------------------------
+ // Run: transform the statement as described above.
+ //
+ void Run()
+ {
+ ClearFatFlag();
+ CreateRemainder();
+ CreateCheck();
+ CreateThen();
+ CreateElse();
+
+ RemoveOldStatement();
+ SetWeights();
+ ChainFlow();
+ }
+
+ private:
+ //------------------------------------------------------------------------
+ // GetCall: find a call in a statement.
+ //
+ // Arguments:
+ // callStmt - the statement with the call inside.
+ //
+ // Return Value:
+ // call tree node pointer.
+ GenTreeCall* GetCall(GenTreeStmt* callStmt)
+ {
+ GenTreePtr tree = callStmt->gtStmtExpr;
+ GenTreeCall* call = nullptr;
+ if (doesReturnValue)
+ {
+ assert(tree->OperIsAssignment());
+ call = tree->gtGetOp2()->AsCall();
+ }
+ else
+ {
+ call = tree->AsCall(); // call with void return type.
+ }
+ return call;
+ }
+
+ //------------------------------------------------------------------------
+ // ClearFatFlag: clear fat pointer candidate flag from the original call.
+ //
+ void ClearFatFlag()
+ {
+ origCall->ClearFatPointerCandidate();
+ }
+
+ //------------------------------------------------------------------------
+ // CreateRemainder: split current block at the fat call stmt and
+ // insert statements after the call into remainderBlock.
+ //
+ void CreateRemainder()
+ {
+ remainderBlock = compiler->fgSplitBlockAfterStatement(currBlock, stmt);
+ unsigned propagateFlags = currBlock->bbFlags & BBF_GC_SAFE_POINT;
+ remainderBlock->bbFlags |= BBF_JMP_TARGET | BBF_HAS_LABEL | propagateFlags;
+ }
+
+ //------------------------------------------------------------------------
+ // CreateCheck: create check block, that checks fat pointer bit set.
+ //
+ void CreateCheck()
+ {
+ checkBlock = CreateAndInsertBasicBlock(BBJ_COND, currBlock);
+ GenTreePtr fatPointerMask = new (compiler, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, FAT_POINTER_MASK);
+ GenTreePtr fptrAddressCopy = compiler->gtCloneExpr(fptrAddress);
+ GenTreePtr fatPointerAnd = compiler->gtNewOperNode(GT_AND, TYP_I_IMPL, fptrAddressCopy, fatPointerMask);
+ GenTreePtr zero = new (compiler, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, 0);
+ GenTreePtr fatPointerCmp = compiler->gtNewOperNode(GT_NE, TYP_INT, fatPointerAnd, zero);
+ GenTreePtr jmpTree = compiler->gtNewOperNode(GT_JTRUE, TYP_VOID, fatPointerCmp);
+ GenTreePtr jmpStmt = compiler->fgNewStmtFromTree(jmpTree, stmt->gtStmt.gtStmtILoffsx);
+ compiler->fgInsertStmtAtEnd(checkBlock, jmpStmt);
+ }
+
+ //------------------------------------------------------------------------
+ // CreateCheck: create then block, that is executed if call address is not fat pointer.
+ //
+ void CreateThen()
+ {
+ thenBlock = CreateAndInsertBasicBlock(BBJ_ALWAYS, checkBlock);
+ GenTreePtr nonFatCallStmt = compiler->gtCloneExpr(stmt)->AsStmt();
+ compiler->fgInsertStmtAtEnd(thenBlock, nonFatCallStmt);
+ }
+
+ //------------------------------------------------------------------------
+ // CreateCheck: create else block, that is executed if call address is fat pointer.
+ //
+ void CreateElse()
+ {
+ elseBlock = CreateAndInsertBasicBlock(BBJ_NONE, thenBlock);
+
+ GenTreePtr fixedFptrAddress = GetFixedFptrAddress();
+ GenTreePtr actualCallAddress = compiler->gtNewOperNode(GT_IND, pointerType, fixedFptrAddress);
+ GenTreePtr hiddenArgument = GetHiddenArgument(fixedFptrAddress);
+
+ GenTreeStmt* fatStmt = CreateFatCallStmt(actualCallAddress, hiddenArgument);
+ compiler->fgInsertStmtAtEnd(elseBlock, fatStmt);
+ }
+
+ //------------------------------------------------------------------------
+ // CreateAndInsertBasicBlock: ask compiler to create new basic block.
+ // and insert in into the basic block list.
+ //
+ // Arguments:
+ // jumpKind - jump kind for the new basic block
+ // insertAfter - basic block, after which compiler has to insert the new one.
+ //
+ // Return Value:
+ // new basic block.
+ BasicBlock* CreateAndInsertBasicBlock(BBjumpKinds jumpKind, BasicBlock* insertAfter)
+ {
+ BasicBlock* block = compiler->fgNewBBafter(jumpKind, insertAfter, true);
+ if ((insertAfter->bbFlags & BBF_INTERNAL) == 0)
+ {
+ block->bbFlags &= ~BBF_INTERNAL;
+ block->bbFlags |= BBF_IMPORTED;
+ }
+ return block;
+ }
+
+ //------------------------------------------------------------------------
+ // GetFixedFptrAddress: clear fat pointer bit from fat pointer address.
+ //
+ // Return Value:
+ // address without fat pointer bit set.
+ GenTreePtr GetFixedFptrAddress()
+ {
+ GenTreePtr fptrAddressCopy = compiler->gtCloneExpr(fptrAddress);
+ GenTreePtr fatPointerMask = new (compiler, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, FAT_POINTER_MASK);
+ return compiler->gtNewOperNode(GT_XOR, pointerType, fptrAddressCopy, fatPointerMask);
+ }
+
+ //------------------------------------------------------------------------
+ // GetHiddenArgument: load hidden argument.
+ //
+ // Arguments:
+ // fixedFptrAddress - pointer to the tuple <methodPointer, instantiationArgumentPointer>
+ //
+ // Return Value:
+ // loaded hidden argument.
+ GenTreePtr GetHiddenArgument(GenTreePtr fixedFptrAddress)
+ {
+ GenTreePtr fixedFptrAddressCopy = compiler->gtCloneExpr(fixedFptrAddress);
+ GenTreePtr wordSize = new (compiler, GT_CNS_INT) GenTreeIntCon(TYP_I_IMPL, genTypeSize(TYP_I_IMPL));
+ GenTreePtr hiddenArgumentPtrPtr =
+ compiler->gtNewOperNode(GT_ADD, pointerType, fixedFptrAddressCopy, wordSize);
+ GenTreePtr hiddenArgumentPtr = compiler->gtNewOperNode(GT_IND, pointerType, hiddenArgumentPtrPtr);
+ return compiler->gtNewOperNode(GT_IND, fixedFptrAddressCopy->TypeGet(), hiddenArgumentPtr);
+ }
+
+ //------------------------------------------------------------------------
+ // CreateFatCallStmt: create call with fixed call address and hidden argument in the args list.
+ //
+ // Arguments:
+ // actualCallAddress - fixed call address
+ // hiddenArgument - loaded hidden argument
+ //
+ // Return Value:
+ // created call node.
+ GenTreeStmt* CreateFatCallStmt(GenTreePtr actualCallAddress, GenTreePtr hiddenArgument)
+ {
+ GenTreeStmt* fatStmt = compiler->gtCloneExpr(stmt)->AsStmt();
+ GenTreePtr fatTree = fatStmt->gtStmtExpr;
+ GenTreeCall* fatCall = GetCall(fatStmt);
+ fatCall->gtCallAddr = actualCallAddress;
+ GenTreeArgList* args = fatCall->gtCallArgs;
+ args = compiler->gtNewListNode(hiddenArgument, args);
+ fatCall->gtCallArgs = args;
+ return fatStmt;
+ }
+
+ //------------------------------------------------------------------------
+ // RemoveOldStatement: remove original stmt from current block.
+ //
+ void RemoveOldStatement()
+ {
+ compiler->fgRemoveStmt(currBlock, stmt);
+ }
+
+ //------------------------------------------------------------------------
+ // SetWeights: set weights for new blocks.
+ //
+ void SetWeights()
+ {
+ remainderBlock->inheritWeight(currBlock);
+ checkBlock->inheritWeight(currBlock);
+ thenBlock->inheritWeightPercentage(currBlock, HIGH_PROBABILITY);
+ elseBlock->inheritWeightPercentage(currBlock, 100 - HIGH_PROBABILITY);
+ }
+
+ //------------------------------------------------------------------------
+ // ChainFlow: link new blocks into correct cfg.
+ //
+ void ChainFlow()
+ {
+ assert(!compiler->fgComputePredsDone);
+ checkBlock->bbJumpDest = elseBlock;
+ thenBlock->bbJumpDest = remainderBlock;
+ }
+
+ Compiler* compiler;
+ BasicBlock* currBlock;
+ BasicBlock* remainderBlock;
+ BasicBlock* checkBlock;
+ BasicBlock* thenBlock;
+ BasicBlock* elseBlock;
+ GenTreeStmt* stmt;
+ GenTreeCall* origCall;
+ GenTreePtr fptrAddress;
+ var_types pointerType;
+ bool doesReturnValue;
+
+ const int FAT_POINTER_MASK = 0x2;
+ const int HIGH_PROBABILITY = 80;
+ };
+
+ Compiler* compiler;
+};
+
+#ifdef DEBUG
+
+//------------------------------------------------------------------------
+// fgDebugCheckFatPointerCandidates: callback to make sure there are no more GTF_CALL_M_FAT_POINTER_CHECK calls.
+//
+Compiler::fgWalkResult Compiler::fgDebugCheckFatPointerCandidates(GenTreePtr* pTree, fgWalkData* data)
+{
+ GenTreePtr tree = *pTree;
+ if (tree->IsCall())
+ {
+ assert(!tree->AsCall()->IsFatPointerCandidate());
+ }
+ return WALK_CONTINUE;
+}
+
+//------------------------------------------------------------------------
+// CheckNoFatPointerCandidatesLeft: walk through blocks and check that there are no fat pointer candidates left.
+//
+void Compiler::CheckNoFatPointerCandidatesLeft()
+{
+ assert(!doesMethodHaveFatPointer());
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ for (GenTreeStmt* stmt = fgFirstBB->firstStmt(); stmt != nullptr; stmt = stmt->gtNextStmt)
+ {
+ fgWalkTreePre(&stmt->gtStmtExpr, fgDebugCheckFatPointerCandidates);
+ }
+ }
+}
+#endif
+
+//------------------------------------------------------------------------
+// fgTransformFatCalli: find and transform fat calls.
+//
+void Compiler::fgTransformFatCalli()
+{
+ assert(IsTargetAbi(CORINFO_CORERT_ABI));
+ FatCalliTransformer fatCalliTransformer(this);
+ fatCalliTransformer.Run();
+ clearMethodHasFatPointer();
+#ifdef DEBUG
+ CheckNoFatPointerCandidatesLeft();
+#endif
+}
diff --git a/src/jit/gcencode.cpp b/src/jit/gcencode.cpp
index 128fc4addb..dcca19ebe8 100644
--- a/src/jit/gcencode.cpp
+++ b/src/jit/gcencode.cpp
@@ -3778,8 +3778,10 @@ void GCInfo::gcInfoBlockHdrSave(GcInfoEncoder* gcInfoEncoder, unsigned methodSiz
}
#endif // FEATURE_EH_FUNCLETS
+#if FEATURE_FIXED_OUT_ARGS
// outgoing stack area size
gcInfoEncoderWithLog->SetSizeOfStackOutgoingAndScratchArea(compiler->lvaOutgoingArgSpaceSize);
+#endif // FEATURE_FIXED_OUT_ARGS
#if DISPLAY_SIZES
@@ -3941,13 +3943,6 @@ void GCInfo::gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder,
// If we haven't continued to the next variable, we should report this as an untracked local.
CLANG_FORMAT_COMMENT_ANCHOR;
-#if DOUBLE_ALIGN
- // For genDoubleAlign(), locals are addressed relative to ESP and
- // arguments are addressed relative to EBP.
-
- if (genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)
- offset += compiler->codeGen->genTotalFrameSize();
-#endif
GcSlotFlags flags = GC_SLOT_UNTRACKED;
if (varDsc->TypeGet() == TYP_BYREF)
@@ -3998,7 +3993,7 @@ void GCInfo::gcMakeRegPtrTable(GcInfoEncoder* gcInfoEncoder,
// For genDoubleAlign(), locals are addressed relative to ESP and
// arguments are addressed relative to EBP.
- if (genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)
+ if (compiler->genDoubleAlign() && varDsc->lvIsParam && !varDsc->lvIsRegArg)
offset += compiler->codeGen->genTotalFrameSize();
#endif
GcSlotFlags flags = GC_SLOT_UNTRACKED;
diff --git a/src/jit/gcinfo.cpp b/src/jit/gcinfo.cpp
index b64fd0a174..e2f76f3b13 100644
--- a/src/jit/gcinfo.cpp
+++ b/src/jit/gcinfo.cpp
@@ -265,6 +265,12 @@ GCInfo::WriteBarrierForm GCInfo::gcIsWriteBarrierCandidate(GenTreePtr tgt, GenTr
case GT_STOREIND:
#endif // !LEGACY_BACKEND
case GT_IND: /* Could be the managed heap */
+ if (tgt->TypeGet() == TYP_BYREF)
+ {
+ // Byref values cannot be in managed heap.
+ // This case occurs for Span<T>.
+ return WBF_NoBarrier;
+ }
return gcWriteBarrierFormFromTargetAddress(tgt->gtOp.gtOp1);
case GT_LEA:
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 4a6cc740c6..7af500f877 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -678,7 +678,7 @@ Compiler::fgWalkResult Compiler::fgWalkTreePreRec(GenTreePtr* pTree, fgWalkData*
if (kind & GTK_SMPOP)
{
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
{
if (tree->gtOp.gtOp1 != nullptr)
{
@@ -847,12 +847,12 @@ Compiler::fgWalkResult Compiler::fgWalkTreePreRec(GenTreePtr* pTree, fgWalkData*
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- result = fgWalkTreePreRec<computeStack>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
+ result = fgWalkTreePreRec<computeStack>(&tree->gtBoundsChk.gtIndex, fgWalkData);
if (result == WALK_ABORT)
{
return result;
}
- result = fgWalkTreePreRec<computeStack>(&tree->gtBoundsChk.gtIndex, fgWalkData);
+ result = fgWalkTreePreRec<computeStack>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
if (result == WALK_ABORT)
{
return result;
@@ -1102,12 +1102,12 @@ Compiler::fgWalkResult Compiler::fgWalkTreePostRec(GenTreePtr* pTree, fgWalkData
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- result = fgWalkTreePostRec<computeStack>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
+ result = fgWalkTreePostRec<computeStack>(&tree->gtBoundsChk.gtIndex, fgWalkData);
if (result == WALK_ABORT)
{
return result;
}
- result = fgWalkTreePostRec<computeStack>(&tree->gtBoundsChk.gtIndex, fgWalkData);
+ result = fgWalkTreePostRec<computeStack>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
if (result == WALK_ABORT)
{
return result;
@@ -1301,7 +1301,7 @@ Compiler::fgWalkResult Compiler::fgWalkTreeRec(GenTreePtr* pTree, fgWalkData* fg
}
}
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
{
result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtOp.gtOp2, fgWalkData);
if (result == WALK_ABORT)
@@ -1446,12 +1446,12 @@ Compiler::fgWalkResult Compiler::fgWalkTreeRec(GenTreePtr* pTree, fgWalkData* fg
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtBoundsChk.gtIndex, fgWalkData);
if (result == WALK_ABORT)
{
return result;
}
- result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtBoundsChk.gtIndex, fgWalkData);
+ result = fgWalkTreeRec<doPreOrder, doPostOrder>(&tree->gtBoundsChk.gtArrLen, fgWalkData);
if (result == WALK_ABORT)
{
return result;
@@ -2378,8 +2378,8 @@ AGAIN:
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- return Compare(op1->gtBoundsChk.gtArrLen, op2->gtBoundsChk.gtArrLen) &&
- Compare(op1->gtBoundsChk.gtIndex, op2->gtBoundsChk.gtIndex) &&
+ return Compare(op1->gtBoundsChk.gtIndex, op2->gtBoundsChk.gtIndex) &&
+ Compare(op1->gtBoundsChk.gtArrLen, op2->gtBoundsChk.gtArrLen) &&
(op1->gtBoundsChk.gtThrowKind == op2->gtBoundsChk.gtThrowKind);
case GT_STORE_DYN_BLK:
@@ -2447,7 +2447,7 @@ AGAIN:
if (kind & GTK_SMPOP)
{
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
{
if (gtHasRef(tree->gtOp.gtOp1, lclNum, defOnly))
{
@@ -2604,11 +2604,11 @@ AGAIN:
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- if (gtHasRef(tree->gtBoundsChk.gtArrLen, lclNum, defOnly))
+ if (gtHasRef(tree->gtBoundsChk.gtIndex, lclNum, defOnly))
{
return true;
}
- if (gtHasRef(tree->gtBoundsChk.gtIndex, lclNum, defOnly))
+ if (gtHasRef(tree->gtBoundsChk.gtArrLen, lclNum, defOnly))
{
return true;
}
@@ -2686,6 +2686,8 @@ bool Compiler::gtHasLocalsWithAddrOp(GenTreePtr tree)
return desc.hasAddrTakenLcl;
}
+#ifdef DEBUG
+
/*****************************************************************************
*
* Helper used to compute hash values for trees.
@@ -2701,11 +2703,6 @@ inline unsigned genTreeHashAdd(unsigned old, void* add)
return genTreeHashAdd(old, (unsigned)(size_t)add);
}
-inline unsigned genTreeHashAdd(unsigned old, unsigned add1, unsigned add2)
-{
- return (old + old / 2) ^ add1 ^ add2;
-}
-
/*****************************************************************************
*
* Given an arbitrary expression tree, compute a hash value for it.
@@ -2900,18 +2897,6 @@ AGAIN:
unsigned hsh1 = gtHashValue(op1);
- /* Special case: addition of two values */
-
- if (GenTree::OperIsCommutative(oper))
- {
- unsigned hsh2 = gtHashValue(op2);
-
- /* Produce a hash that allows swapping the operands */
-
- hash = genTreeHashAdd(hash, hsh1, hsh2);
- goto DONE;
- }
-
/* Add op1's hash to the running value and continue with op2 */
hash = genTreeHashAdd(hash, hsh1);
@@ -3001,8 +2986,8 @@ AGAIN:
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- hash = genTreeHashAdd(hash, gtHashValue(tree->gtBoundsChk.gtArrLen));
hash = genTreeHashAdd(hash, gtHashValue(tree->gtBoundsChk.gtIndex));
+ hash = genTreeHashAdd(hash, gtHashValue(tree->gtBoundsChk.gtArrLen));
hash = genTreeHashAdd(hash, tree->gtBoundsChk.gtThrowKind);
break;
@@ -3027,6 +3012,8 @@ DONE:
return hash;
}
+#endif // DEBUG
+
/*****************************************************************************
*
* Given an arbitrary expression tree, attempts to find the set of all local variables
@@ -3194,7 +3181,7 @@ AGAIN:
}
}
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
{
/* It's a binary operator */
if (!lvaLclVarRefsAccum(tree->gtOp.gtOp1, findPtr, refsPtr, &allVars, &trkdVars))
@@ -3265,12 +3252,12 @@ AGAIN:
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
{
- if (!lvaLclVarRefsAccum(tree->gtBoundsChk.gtArrLen, findPtr, refsPtr, &allVars, &trkdVars))
+ if (!lvaLclVarRefsAccum(tree->gtBoundsChk.gtIndex, findPtr, refsPtr, &allVars, &trkdVars))
{
return false;
}
// Otherwise...
- if (!lvaLclVarRefsAccum(tree->gtBoundsChk.gtIndex, findPtr, refsPtr, &allVars, &trkdVars))
+ if (!lvaLclVarRefsAccum(tree->gtBoundsChk.gtArrLen, findPtr, refsPtr, &allVars, &trkdVars))
{
return false;
}
@@ -3375,6 +3362,10 @@ genTreeOps GenTree::ReverseRelop(genTreeOps relop)
GT_GT, // GT_LE
GT_LT, // GT_GE
GT_LE, // GT_GT
+#ifndef LEGACY_BACKEND
+ GT_TEST_NE, // GT_TEST_EQ
+ GT_TEST_EQ, // GT_TEST_NE
+#endif
};
assert(reverseOps[GT_EQ - GT_EQ] == GT_NE);
@@ -3385,6 +3376,11 @@ genTreeOps GenTree::ReverseRelop(genTreeOps relop)
assert(reverseOps[GT_GE - GT_EQ] == GT_LT);
assert(reverseOps[GT_GT - GT_EQ] == GT_LE);
+#ifndef LEGACY_BACKEND
+ assert(reverseOps[GT_TEST_EQ - GT_EQ] == GT_TEST_NE);
+ assert(reverseOps[GT_TEST_NE - GT_EQ] == GT_TEST_EQ);
+#endif
+
assert(OperIsCompare(relop));
assert(relop >= GT_EQ && (unsigned)(relop - GT_EQ) < sizeof(reverseOps));
@@ -3406,6 +3402,10 @@ genTreeOps GenTree::SwapRelop(genTreeOps relop)
GT_GE, // GT_LE
GT_LE, // GT_GE
GT_LT, // GT_GT
+#ifndef LEGACY_BACKEND
+ GT_TEST_EQ, // GT_TEST_EQ
+ GT_TEST_NE, // GT_TEST_NE
+#endif
};
assert(swapOps[GT_EQ - GT_EQ] == GT_EQ);
@@ -3416,6 +3416,11 @@ genTreeOps GenTree::SwapRelop(genTreeOps relop)
assert(swapOps[GT_GE - GT_EQ] == GT_LE);
assert(swapOps[GT_GT - GT_EQ] == GT_LT);
+#ifndef LEGACY_BACKEND
+ assert(swapOps[GT_TEST_EQ - GT_EQ] == GT_TEST_EQ);
+ assert(swapOps[GT_TEST_NE - GT_EQ] == GT_TEST_NE);
+#endif
+
assert(OperIsCompare(relop));
assert(relop >= GT_EQ && (unsigned)(relop - GT_EQ) < sizeof(swapOps));
@@ -4146,7 +4151,7 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
unsigned lvl2; // scratch variable
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
costEx = 0;
costSz = 0;
@@ -5622,17 +5627,17 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
costEx = 4; // cmp reg,reg and jae throw (not taken)
costSz = 7; // jump to cold section
- level = gtSetEvalOrder(tree->gtBoundsChk.gtArrLen);
- costEx += tree->gtBoundsChk.gtArrLen->gtCostEx;
- costSz += tree->gtBoundsChk.gtArrLen->gtCostSz;
+ level = gtSetEvalOrder(tree->gtBoundsChk.gtIndex);
+ costEx += tree->gtBoundsChk.gtIndex->gtCostEx;
+ costSz += tree->gtBoundsChk.gtIndex->gtCostSz;
- lvl2 = gtSetEvalOrder(tree->gtBoundsChk.gtIndex);
+ lvl2 = gtSetEvalOrder(tree->gtBoundsChk.gtArrLen);
if (level < lvl2)
{
level = lvl2;
}
- costEx += tree->gtBoundsChk.gtIndex->gtCostEx;
- costSz += tree->gtBoundsChk.gtIndex->gtCostSz;
+ costEx += tree->gtBoundsChk.gtArrLen->gtCostEx;
+ costSz += tree->gtBoundsChk.gtArrLen->gtCostSz;
break;
@@ -5761,7 +5766,7 @@ void Compiler::gtComputeFPlvls(GenTreePtr tree)
if (kind & GTK_SMPOP)
{
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
/* Check for some special cases */
@@ -5954,16 +5959,16 @@ void Compiler::gtComputeFPlvls(GenTreePtr tree)
break;
case GT_ARR_BOUNDS_CHECK:
- gtComputeFPlvls(tree->gtBoundsChk.gtArrLen);
gtComputeFPlvls(tree->gtBoundsChk.gtIndex);
+ gtComputeFPlvls(tree->gtBoundsChk.gtArrLen);
noway_assert(!isflt);
break;
-#ifdef DEBUG
default:
+#ifdef DEBUG
noway_assert(!"Unhandled special operator in gtComputeFPlvls()");
- break;
#endif
+ break;
}
DONE:
@@ -6134,14 +6139,14 @@ GenTreePtr* GenTree::gtGetChildPointer(GenTreePtr parent)
#ifdef FEATURE_SIMD
case GT_SIMD_CHK:
#endif // FEATURE_SIMD
- if (this == parent->gtBoundsChk.gtArrLen)
- {
- return &(parent->gtBoundsChk.gtArrLen);
- }
if (this == parent->gtBoundsChk.gtIndex)
{
return &(parent->gtBoundsChk.gtIndex);
}
+ if (this == parent->gtBoundsChk.gtArrLen)
+ {
+ return &(parent->gtBoundsChk.gtArrLen);
+ }
if (this == parent->gtBoundsChk.gtIndRngFailBB)
{
return &(parent->gtBoundsChk.gtIndRngFailBB);
@@ -6787,6 +6792,57 @@ GenTreePtr Compiler::gtNewOneConNode(var_types type)
}
}
+#ifdef FEATURE_SIMD
+//---------------------------------------------------------------------
+// gtNewSIMDVectorZero: create a GT_SIMD node for Vector<T>.Zero
+//
+// Arguments:
+// simdType - simd vector type
+// baseType - element type of vector
+// size - size of vector in bytes
+GenTreePtr Compiler::gtNewSIMDVectorZero(var_types simdType, var_types baseType, unsigned size)
+{
+ baseType = genActualType(baseType);
+ GenTree* initVal = gtNewZeroConNode(baseType);
+ initVal->gtType = baseType;
+ return gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, baseType, size);
+}
+
+//---------------------------------------------------------------------
+// gtNewSIMDVectorOne: create a GT_SIMD node for Vector<T>.One
+//
+// Arguments:
+// simdType - simd vector type
+// baseType - element type of vector
+// size - size of vector in bytes
+GenTreePtr Compiler::gtNewSIMDVectorOne(var_types simdType, var_types baseType, unsigned size)
+{
+ GenTree* initVal;
+ if (varTypeIsSmallInt(baseType))
+ {
+ unsigned baseSize = genTypeSize(baseType);
+ int val;
+ if (baseSize == 1)
+ {
+ val = 0x01010101;
+ }
+ else
+ {
+ val = 0x00010001;
+ }
+ initVal = gtNewIconNode(val);
+ }
+ else
+ {
+ initVal = gtNewOneConNode(baseType);
+ }
+
+ baseType = genActualType(baseType);
+ initVal->gtType = baseType;
+ return gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, baseType, size);
+}
+#endif // FEATURE_SIMD
+
GenTreeCall* Compiler::gtNewIndCallNode(GenTreePtr addr, var_types type, GenTreeArgList* args, IL_OFFSETX ilOffset)
{
return gtNewCallNode(CT_INDIRECT, (CORINFO_METHOD_HANDLE)addr, type, args, ilOffset);
@@ -7525,9 +7581,7 @@ void Compiler::gtBlockOpInit(GenTreePtr result, GenTreePtr dst, GenTreePtr srcOr
if (dst->OperIsLocal() && varTypeIsStruct(dst))
{
- unsigned lclNum = dst->AsLclVarCommon()->GetLclNum();
- LclVarDsc* lclVarDsc = &lvaTable[lclNum];
- lclVarDsc->lvUsedInSIMDIntrinsic = true;
+ setLclRelatedToSIMDIntrinsic(dst);
}
}
}
@@ -8031,7 +8085,7 @@ GenTreePtr Compiler::gtCloneExpr(
case GT_SIMD:
{
GenTreeSIMD* simdOp = tree->AsSIMD();
- copy = gtNewSIMDNode(simdOp->TypeGet(), simdOp->gtGetOp1(), simdOp->gtGetOp2(),
+ copy = gtNewSIMDNode(simdOp->TypeGet(), simdOp->gtGetOp1(), simdOp->gtGetOp2IfPresent(),
simdOp->gtSIMDIntrinsicID, simdOp->gtSIMDBaseType, simdOp->gtSIMDSize);
}
break;
@@ -8079,7 +8133,7 @@ GenTreePtr Compiler::gtCloneExpr(
}
}
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
{
copy->gtOp.gtOp2 = gtCloneExpr(tree->gtOp.gtOp2, addFlags, deepVarNum, deepVarVal);
}
@@ -8130,7 +8184,7 @@ GenTreePtr Compiler::gtCloneExpr(
{
copy->gtFlags |= (copy->gtOp.gtOp1->gtFlags & GTF_ALL_EFFECT);
}
- if (copy->gtGetOp2() != nullptr)
+ if (copy->gtGetOp2IfPresent() != nullptr)
{
copy->gtFlags |= (copy->gtGetOp2()->gtFlags & GTF_ALL_EFFECT);
}
@@ -8290,8 +8344,8 @@ GenTreePtr Compiler::gtCloneExpr(
#endif // FEATURE_SIMD
copy = new (this, oper)
GenTreeBoundsChk(oper, tree->TypeGet(),
- gtCloneExpr(tree->gtBoundsChk.gtArrLen, addFlags, deepVarNum, deepVarVal),
gtCloneExpr(tree->gtBoundsChk.gtIndex, addFlags, deepVarNum, deepVarVal),
+ gtCloneExpr(tree->gtBoundsChk.gtArrLen, addFlags, deepVarNum, deepVarVal),
tree->gtBoundsChk.gtThrowKind);
break;
@@ -9000,9 +9054,9 @@ GenTreePtr GenTree::GetChild(unsigned childNum)
switch (childNum)
{
case 0:
- return AsBoundsChk()->gtArrLen;
- case 1:
return AsBoundsChk()->gtIndex;
+ case 1:
+ return AsBoundsChk()->gtArrLen;
default:
unreached();
}
@@ -9176,9 +9230,9 @@ GenTree** GenTreeUseEdgeIterator::GetNextUseEdge() const
switch (m_state)
{
case 0:
- return &m_node->AsBoundsChk()->gtArrLen;
- case 1:
return &m_node->AsBoundsChk()->gtIndex;
+ case 1:
+ return &m_node->AsBoundsChk()->gtArrLen;
default:
return nullptr;
}
@@ -10208,6 +10262,10 @@ void Compiler::gtDispNode(GenTreePtr tree, IndentStack* indentStack, __in __in_z
case GT_LE:
case GT_GE:
case GT_GT:
+#ifndef LEGACY_BACKEND
+ case GT_TEST_EQ:
+ case GT_TEST_NE:
+#endif
if (tree->gtFlags & GTF_RELOP_NAN_UN)
{
printf("N");
@@ -10226,12 +10284,6 @@ void Compiler::gtDispNode(GenTreePtr tree, IndentStack* indentStack, __in __in_z
--msgLength;
break;
}
- if (tree->gtFlags & GTF_RELOP_SMALL)
- {
- printf("S");
- --msgLength;
- break;
- }
goto DASH;
default:
@@ -10694,7 +10746,17 @@ void Compiler::gtDispConst(GenTree* tree)
case GT_CNS_INT:
if (tree->IsIconHandle(GTF_ICON_STR_HDL))
{
- printf(" 0x%X \"%S\"", dspPtr(tree->gtIntCon.gtIconVal), eeGetCPString(tree->gtIntCon.gtIconVal));
+ const wchar_t* str = eeGetCPString(tree->gtIntCon.gtIconVal);
+ if (str != nullptr)
+ {
+ printf(" 0x%X \"%S\"", dspPtr(tree->gtIntCon.gtIconVal), str);
+ }
+ else
+ {
+ // Note that eGetCPString isn't currently implemented on Linux/ARM
+ // and instead always returns nullptr
+ printf(" 0x%X [ICON_STR_HDL]", dspPtr(tree->gtIntCon.gtIconVal));
+ }
}
else
{
@@ -11255,7 +11317,7 @@ void Compiler::gtDispTree(GenTreePtr tree,
{
if (!topOnly)
{
- if (tree->gtGetOp2())
+ if (tree->gtGetOp2IfPresent())
{
// Label the childMsgs of the GT_COLON operator
// op2 is the then part
@@ -11670,8 +11732,8 @@ void Compiler::gtDispTree(GenTreePtr tree,
printf("\n");
if (!topOnly)
{
- gtDispChild(tree->gtBoundsChk.gtArrLen, indentStack, IIArc, nullptr, topOnly);
- gtDispChild(tree->gtBoundsChk.gtIndex, indentStack, IIArcBottom, nullptr, topOnly);
+ gtDispChild(tree->gtBoundsChk.gtIndex, indentStack, IIArc, nullptr, topOnly);
+ gtDispChild(tree->gtBoundsChk.gtArrLen, indentStack, IIArcBottom, nullptr, topOnly);
}
break;
@@ -11955,6 +12017,10 @@ void Compiler::gtDispLIRNode(GenTree* node)
// 49 spaces for alignment
printf("%-49s", "");
+#ifdef FEATURE_SET_FLAGS
+ // additional flag enlarges the flag field by one character
+ printf(" ");
+#endif
indentStack.Push(operandArc);
indentStack.print();
@@ -12615,7 +12681,7 @@ GenTreePtr Compiler::gtFoldExprConst(GenTreePtr tree)
assert(kind & (GTK_UNOP | GTK_BINOP));
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
if (!opts.OptEnabled(CLFLG_CONSTANTFOLD))
{
@@ -14411,12 +14477,14 @@ GenTreePtr Compiler::gtBuildCommaList(GenTreePtr list, GenTreePtr expr)
result->gtFlags |= (list->gtFlags & GTF_ALL_EFFECT);
result->gtFlags |= (expr->gtFlags & GTF_ALL_EFFECT);
- // 'list' and 'expr' should have valuenumbers defined for both or for neither one
- noway_assert(list->gtVNPair.BothDefined() == expr->gtVNPair.BothDefined());
+ // 'list' and 'expr' should have valuenumbers defined for both or for neither one (unless we are remorphing,
+ // in which case a prior transform involving either node may have discarded or otherwise invalidated the value
+ // numbers).
+ assert((list->gtVNPair.BothDefined() == expr->gtVNPair.BothDefined()) || !fgGlobalMorph);
// Set the ValueNumber 'gtVNPair' for the new GT_COMMA node
//
- if (expr->gtVNPair.BothDefined())
+ if (list->gtVNPair.BothDefined() && expr->gtVNPair.BothDefined())
{
// The result of a GT_COMMA node is op2, the normal value number is op2vnp
// But we also need to include the union of side effects from op1 and op2.
@@ -14505,7 +14573,7 @@ void Compiler::gtExtractSideEffList(GenTreePtr expr,
if (kind & GTK_SMPOP)
{
GenTreePtr op1 = expr->gtOp.gtOp1;
- GenTreePtr op2 = expr->gtGetOp2();
+ GenTreePtr op2 = expr->gtGetOp2IfPresent();
if (flags & GTF_EXCEPT)
{
@@ -14589,8 +14657,8 @@ void Compiler::gtExtractSideEffList(GenTreePtr expr,
#endif // FEATURE_SIMD
)
{
- gtExtractSideEffList(expr->AsBoundsChk()->gtArrLen, pList, flags);
gtExtractSideEffList(expr->AsBoundsChk()->gtIndex, pList, flags);
+ gtExtractSideEffList(expr->AsBoundsChk()->gtArrLen, pList, flags);
}
if (expr->OperGet() == GT_DYN_BLK || expr->OperGet() == GT_STORE_DYN_BLK)
@@ -15046,7 +15114,6 @@ BasicBlock* Compiler::bbNewBasicBlock(BBjumpKinds jumpKind)
{
VarSetOps::AssignNoCopy(this, block->bbVarUse, VarSetOps::MakeEmpty(this));
VarSetOps::AssignNoCopy(this, block->bbVarDef, VarSetOps::MakeEmpty(this));
- VarSetOps::AssignNoCopy(this, block->bbVarTmp, VarSetOps::MakeEmpty(this));
VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::MakeEmpty(this));
VarSetOps::AssignNoCopy(this, block->bbLiveOut, VarSetOps::MakeEmpty(this));
VarSetOps::AssignNoCopy(this, block->bbScope, VarSetOps::MakeEmpty(this));
@@ -15055,20 +15122,22 @@ BasicBlock* Compiler::bbNewBasicBlock(BBjumpKinds jumpKind)
{
VarSetOps::AssignNoCopy(this, block->bbVarUse, VarSetOps::UninitVal());
VarSetOps::AssignNoCopy(this, block->bbVarDef, VarSetOps::UninitVal());
- VarSetOps::AssignNoCopy(this, block->bbVarTmp, VarSetOps::UninitVal());
VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::UninitVal());
VarSetOps::AssignNoCopy(this, block->bbLiveOut, VarSetOps::UninitVal());
VarSetOps::AssignNoCopy(this, block->bbScope, VarSetOps::UninitVal());
}
- block->bbHeapUse = false;
- block->bbHeapDef = false;
- block->bbHeapLiveIn = false;
- block->bbHeapLiveOut = false;
+ block->bbMemoryUse = emptyMemoryKindSet;
+ block->bbMemoryDef = emptyMemoryKindSet;
+ block->bbMemoryLiveIn = emptyMemoryKindSet;
+ block->bbMemoryLiveOut = emptyMemoryKindSet;
- block->bbHeapSsaPhiFunc = nullptr;
- block->bbHeapSsaNumIn = 0;
- block->bbHeapSsaNumOut = 0;
+ for (MemoryKind memoryKind : allMemoryKinds())
+ {
+ block->bbMemorySsaPhiFunc[memoryKind] = nullptr;
+ block->bbMemorySsaNumIn[memoryKind] = 0;
+ block->bbMemorySsaNumOut[memoryKind] = 0;
+ }
// Make sure we reserve a NOT_IN_LOOP value that isn't a legal table index.
static_assert_no_msg(MAX_LOOP_NUM < BasicBlock::NOT_IN_LOOP);
@@ -15717,18 +15786,21 @@ unsigned GenTree::IsLclVarUpdateTree(GenTree** pOtherTree, genTreeOps* pOper)
return lclNum;
}
-// return true if this tree node is a subcomponent of parent for codegen purposes
-// (essentially, will be rolled into the same instruction)
-// Note that this method relies upon the value of gtRegNum field to determine
-// if the treenode is contained or not. Therefore you can not call this method
-// until after the LSRA phase has allocated physical registers to the treenodes.
+//------------------------------------------------------------------------
+// isContained: check whether this tree node is a subcomponent of its parent for codegen purposes
+//
+// Return Value:
+// Returns true if there is no code generated explicitly for this node.
+// Essentially, it will be rolled into the code generation for the parent.
+//
+// Assumptions:
+// This method relies upon the value of gtRegNum field to determine whether the tree node
+// is contained.
+// Therefore you can not call this method until after the LSRA phase has allocated physical
+// registers to the treenodes.
+//
bool GenTree::isContained() const
{
- if (isContainedSpillTemp())
- {
- return true;
- }
-
if (gtHasReg())
{
return false;
@@ -15747,7 +15819,6 @@ bool GenTree::isContained() const
return false;
}
- // TODO-Cleanup : this is not clean, would be nice to have some way of marking this.
switch (OperGet())
{
case GT_STOREIND:
@@ -16253,8 +16324,15 @@ CORINFO_CLASS_HANDLE Compiler::gtGetStructHandleIfPresent(GenTree* tree)
case GT_ASG:
structHnd = gtGetStructHandleIfPresent(tree->gtGetOp1());
break;
- case GT_LCL_VAR:
case GT_LCL_FLD:
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(tree))
+ {
+ structHnd = gtGetStructHandleForSIMD(tree->gtType, TYP_FLOAT);
+ }
+#endif
+ break;
+ case GT_LCL_VAR:
structHnd = lvaTable[tree->AsLclVarCommon()->gtLclNum].lvVerTypeInfo.GetClassHandle();
break;
case GT_RETURN:
@@ -16792,15 +16870,8 @@ bool FieldSeqNode::IsPseudoField()
GenTreeSIMD* Compiler::gtNewSIMDNode(
var_types type, GenTreePtr op1, SIMDIntrinsicID simdIntrinsicID, var_types baseType, unsigned size)
{
- // TODO-CQ: An operand may be a GT_OBJ(GT_ADDR(GT_LCL_VAR))), in which case it should be
- // marked lvUsedInSIMDIntrinsic.
assert(op1 != nullptr);
- if (op1->OperGet() == GT_LCL_VAR)
- {
- unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
- LclVarDsc* lclVarDsc = &lvaTable[lclNum];
- lclVarDsc->lvUsedInSIMDIntrinsic = true;
- }
+ SetOpLclRelatedToSIMDIntrinsic(op1);
return new (this, GT_SIMD) GenTreeSIMD(type, op1, simdIntrinsicID, baseType, size);
}
@@ -16808,24 +16879,34 @@ GenTreeSIMD* Compiler::gtNewSIMDNode(
GenTreeSIMD* Compiler::gtNewSIMDNode(
var_types type, GenTreePtr op1, GenTreePtr op2, SIMDIntrinsicID simdIntrinsicID, var_types baseType, unsigned size)
{
- // TODO-CQ: An operand may be a GT_OBJ(GT_ADDR(GT_LCL_VAR))), in which case it should be
- // marked lvUsedInSIMDIntrinsic.
assert(op1 != nullptr);
- if (op1->OperIsLocal())
+ SetOpLclRelatedToSIMDIntrinsic(op1);
+ if (op2 != nullptr)
{
- unsigned lclNum = op1->AsLclVarCommon()->GetLclNum();
- LclVarDsc* lclVarDsc = &lvaTable[lclNum];
- lclVarDsc->lvUsedInSIMDIntrinsic = true;
+ SetOpLclRelatedToSIMDIntrinsic(op2);
}
- if (op2 != nullptr && op2->OperIsLocal())
+ return new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size);
+}
+
+//-------------------------------------------------------------------
+// SetOpLclRelatedToSIMDIntrinsic: Determine if the tree has a local var that needs to be set
+// as used by a SIMD intrinsic, and if so, set that local var appropriately.
+//
+// Arguments:
+// op - The tree, to be an operand of a new GT_SIMD node, to check.
+//
+void Compiler::SetOpLclRelatedToSIMDIntrinsic(GenTreePtr op)
+{
+ if (op->OperIsLocal())
{
- unsigned lclNum = op2->AsLclVarCommon()->GetLclNum();
- LclVarDsc* lclVarDsc = &lvaTable[lclNum];
- lclVarDsc->lvUsedInSIMDIntrinsic = true;
+ setLclRelatedToSIMDIntrinsic(op);
+ }
+ else if ((op->OperGet() == GT_OBJ) && (op->gtOp.gtOp1->OperGet() == GT_ADDR) &&
+ op->gtOp.gtOp1->gtOp.gtOp1->OperIsLocal())
+ {
+ setLclRelatedToSIMDIntrinsic(op->gtOp.gtOp1->gtOp.gtOp1);
}
-
- return new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size);
}
bool GenTree::isCommutativeSIMDIntrinsic()
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index 4611d35465..0ea8321e77 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -566,7 +566,7 @@ public:
bool isContainedIntOrIImmed() const
{
- return isContained() && IsCnsIntOrI() && !isContainedSpillTemp();
+ return isContained() && IsCnsIntOrI() && !isUsedFromSpillTemp();
}
bool isContainedFltOrDblImmed() const
@@ -579,28 +579,34 @@ public:
return OperGet() == GT_LCL_FLD || OperGet() == GT_STORE_LCL_FLD;
}
- bool isContainedLclField() const
+ bool isUsedFromSpillTemp() const;
+
+ // Indicates whether it is a memory op.
+ // Right now it includes Indir and LclField ops.
+ bool isMemoryOp() const
{
- return isContained() && isLclField();
+ return isIndir() || isLclField();
}
- bool isContainedLclVar() const
+ bool isUsedFromMemory() const
{
- return isContained() && (OperGet() == GT_LCL_VAR);
+ return ((isContained() && (isMemoryOp() || (OperGet() == GT_LCL_VAR) || (OperGet() == GT_CNS_DBL))) ||
+ isUsedFromSpillTemp());
}
- bool isContainedSpillTemp() const;
+ bool isLclVarUsedFromMemory() const
+ {
+ return (OperGet() == GT_LCL_VAR) && (isContained() || isUsedFromSpillTemp());
+ }
- // Indicates whether it is a memory op.
- // Right now it includes Indir and LclField ops.
- bool isMemoryOp() const
+ bool isLclFldUsedFromMemory() const
{
- return isIndir() || isLclField();
+ return isLclField() && (isContained() || isUsedFromSpillTemp());
}
- bool isContainedMemoryOp() const
+ bool isUsedFromReg() const
{
- return (isContained() && isMemoryOp()) || isContainedLclVar() || isContainedSpillTemp();
+ return !isContained() && !isUsedFromSpillTemp();
}
regNumber GetRegNum() const
@@ -903,8 +909,6 @@ public:
#define GTF_RELOP_NAN_UN 0x80000000 // GT_<relop> -- Is branch taken if ops are NaN?
#define GTF_RELOP_JMP_USED 0x40000000 // GT_<relop> -- result of compare used for jump or ?:
#define GTF_RELOP_QMARK 0x20000000 // GT_<relop> -- the node is the condition for ?:
-#define GTF_RELOP_SMALL 0x10000000 // GT_<relop> -- We should use a byte or short sized compare (op1->gtType
- // is the small type)
#define GTF_RELOP_ZTT 0x08000000 // GT_<relop> -- Loop test cloned for converting while-loops into do-while
// with explicit "loop test" in the header block.
@@ -1073,6 +1077,17 @@ public:
}
}
+ bool OperIs(genTreeOps oper)
+ {
+ return OperGet() == oper;
+ }
+
+ template <typename... T>
+ bool OperIs(genTreeOps oper, T... rest)
+ {
+ return OperIs(oper) || OperIs(rest...);
+ }
+
static bool OperIsConst(genTreeOps gtOper)
{
return (OperKind(gtOper) & GTK_CONST) != 0;
@@ -1588,8 +1603,14 @@ public:
inline GenTreePtr gtGetOp1();
+ // Directly return op2. Asserts the node is binary. Might return nullptr if the binary node allows
+ // a nullptr op2, such as GT_LIST. This is more efficient than gtGetOp2IfPresent() if you know what
+ // node type you have.
inline GenTreePtr gtGetOp2();
+ // The returned pointer might be nullptr if the node is not binary, or if non-null op2 is not required.
+ inline GenTreePtr gtGetOp2IfPresent();
+
// Given a tree node, if this is a child of that node, return the pointer to the child node so that it
// can be modified; otherwise, return null.
GenTreePtr* gtGetChildPointer(GenTreePtr parent);
@@ -3248,43 +3269,52 @@ struct GenTreeCall final : public GenTree
#endif
}
-#define GTF_CALL_M_EXPLICIT_TAILCALL \
- 0x0001 // GT_CALL -- the call is "tail" prefixed and importer has performed tail call checks
-#define GTF_CALL_M_TAILCALL 0x0002 // GT_CALL -- the call is a tailcall
-#define GTF_CALL_M_VARARGS 0x0004 // GT_CALL -- the call uses varargs ABI
-#define GTF_CALL_M_RETBUFFARG 0x0008 // GT_CALL -- first parameter is the return buffer argument
-#define GTF_CALL_M_DELEGATE_INV 0x0010 // GT_CALL -- call to Delegate.Invoke
-#define GTF_CALL_M_NOGCCHECK 0x0020 // GT_CALL -- not a call for computing full interruptability
-#define GTF_CALL_M_SPECIAL_INTRINSIC 0x0040 // GT_CALL -- function that could be optimized as an intrinsic
- // in special cases. Used to optimize fast way out in morphing
-#define GTF_CALL_M_UNMGD_THISCALL \
- 0x0080 // "this" pointer (first argument) should be enregistered (only for GTF_CALL_UNMANAGED)
-#define GTF_CALL_M_VIRTSTUB_REL_INDIRECT \
- 0x0080 // the virtstub is indirected through a relative address (only for GTF_CALL_VIRT_STUB)
-#define GTF_CALL_M_NONVIRT_SAME_THIS \
- 0x0080 // callee "this" pointer is equal to caller this pointer (only for GTF_CALL_NONVIRT)
-#define GTF_CALL_M_FRAME_VAR_DEATH 0x0100 // GT_CALL -- the compLvFrameListRoot variable dies here (last use)
+// clang-format off
+
+#define GTF_CALL_M_EXPLICIT_TAILCALL 0x00000001 // GT_CALL -- the call is "tail" prefixed and
+ // importer has performed tail call checks
+#define GTF_CALL_M_TAILCALL 0x00000002 // GT_CALL -- the call is a tailcall
+#define GTF_CALL_M_VARARGS 0x00000004 // GT_CALL -- the call uses varargs ABI
+#define GTF_CALL_M_RETBUFFARG 0x00000008 // GT_CALL -- first parameter is the return buffer argument
+#define GTF_CALL_M_DELEGATE_INV 0x00000010 // GT_CALL -- call to Delegate.Invoke
+#define GTF_CALL_M_NOGCCHECK 0x00000020 // GT_CALL -- not a call for computing full interruptability
+#define GTF_CALL_M_SPECIAL_INTRINSIC 0x00000040 // GT_CALL -- function that could be optimized as an intrinsic
+ // in special cases. Used to optimize fast way out in morphing
+#define GTF_CALL_M_UNMGD_THISCALL 0x00000080 // GT_CALL -- "this" pointer (first argument)
+ // should be enregistered (only for GTF_CALL_UNMANAGED)
+#define GTF_CALL_M_VIRTSTUB_REL_INDIRECT 0x00000080 // the virtstub is indirected through
+ // a relative address (only for GTF_CALL_VIRT_STUB)
+#define GTF_CALL_M_NONVIRT_SAME_THIS 0x00000080 // GT_CALL -- callee "this" pointer is
+ // equal to caller this pointer (only for GTF_CALL_NONVIRT)
+#define GTF_CALL_M_FRAME_VAR_DEATH 0x00000100 // GT_CALL -- the compLvFrameListRoot variable dies here (last use)
#ifndef LEGACY_BACKEND
-#define GTF_CALL_M_TAILCALL_VIA_HELPER 0x0200 // GT_CALL -- call is a tail call dispatched via tail call JIT helper.
-#endif // !LEGACY_BACKEND
+#define GTF_CALL_M_TAILCALL_VIA_HELPER 0x00000200 // GT_CALL -- call is a tail call dispatched via tail call JIT helper.
+#endif
#if FEATURE_TAILCALL_OPT
-#define GTF_CALL_M_IMPLICIT_TAILCALL \
- 0x0400 // GT_CALL -- call is an opportunistic tail call and importer has performed tail call checks
-#define GTF_CALL_M_TAILCALL_TO_LOOP \
- 0x0800 // GT_CALL -- call is a fast recursive tail call that can be converted into a loop
+#define GTF_CALL_M_IMPLICIT_TAILCALL 0x00000400 // GT_CALL -- call is an opportunistic
+ // tail call and importer has performed tail call checks
+#define GTF_CALL_M_TAILCALL_TO_LOOP 0x00000800 // GT_CALL -- call is a fast recursive tail call
+ // that can be converted into a loop
#endif
-#define GTF_CALL_M_PINVOKE 0x1000 // GT_CALL -- call is a pinvoke. This mirrors VM flag CORINFO_FLG_PINVOKE.
- // A call marked as Pinvoke is not necessarily a GT_CALL_UNMANAGED. For e.g.
- // an IL Stub dynamically generated for a PInvoke declaration is flagged as
- // a Pinvoke but not as an unmanaged call. See impCheckForPInvokeCall() to
- // know when these flags are set.
+#define GTF_CALL_M_PINVOKE 0x00001000 // GT_CALL -- call is a pinvoke. This mirrors VM flag CORINFO_FLG_PINVOKE.
+ // A call marked as Pinvoke is not necessarily a GT_CALL_UNMANAGED. For e.g.
+ // an IL Stub dynamically generated for a PInvoke declaration is flagged as
+ // a Pinvoke but not as an unmanaged call. See impCheckForPInvokeCall() to
+ // know when these flags are set.
+
+#define GTF_CALL_M_R2R_REL_INDIRECT 0x00002000 // GT_CALL -- ready to run call is indirected through a relative address
+#define GTF_CALL_M_DOES_NOT_RETURN 0x00004000 // GT_CALL -- call does not return
+#define GTF_CALL_M_SECURE_DELEGATE_INV 0x00008000 // GT_CALL -- call is in secure delegate
+#define GTF_CALL_M_FAT_POINTER_CHECK 0x00010000 // GT_CALL -- CoreRT managed calli needs transformation, that checks
+ // special bit in calli address. If it is set, then it is necessary
+ // to restore real function address and load hidden argument
+ // as the first argument for calli. It is CoreRT replacement for instantiating
+ // stubs, because executable code cannot be generated at runtime.
-#define GTF_CALL_M_R2R_REL_INDIRECT 0x2000 // GT_CALL -- ready to run call is indirected through a relative address
-#define GTF_CALL_M_DOES_NOT_RETURN 0x4000 // GT_CALL -- call does not return
-#define GTF_CALL_M_SECURE_DELEGATE_INV 0x8000 // GT_CALL -- call is in secure delegate
+ // clang-format on
bool IsUnmanaged() const
{
@@ -3482,9 +3512,24 @@ struct GenTreeCall final : public GenTree
return (gtCallMoreFlags & GTF_CALL_M_DOES_NOT_RETURN) != 0;
}
+ bool IsFatPointerCandidate() const
+ {
+ return (gtCallMoreFlags & GTF_CALL_M_FAT_POINTER_CHECK) != 0;
+ }
+
bool IsPure(Compiler* compiler) const;
- unsigned short gtCallMoreFlags; // in addition to gtFlags
+ void ClearFatPointerCandidate()
+ {
+ gtCallMoreFlags &= ~GTF_CALL_M_FAT_POINTER_CHECK;
+ }
+
+ void SetFatPointerCandidate()
+ {
+ gtCallMoreFlags |= GTF_CALL_M_FAT_POINTER_CHECK;
+ }
+
+ unsigned gtCallMoreFlags; // in addition to gtFlags
unsigned char gtCallType : 3; // value from the gtCallTypes enumeration
unsigned char gtReturnType : 5; // exact return type
@@ -3764,8 +3809,8 @@ public:
struct GenTreeBoundsChk : public GenTree
{
- GenTreePtr gtArrLen; // An expression for the length of the array being indexed.
GenTreePtr gtIndex; // The index expression.
+ GenTreePtr gtArrLen; // An expression for the length of the array being indexed.
GenTreePtr gtIndRngFailBB; // Label to jump to for array-index-out-of-range
SpecialCodeKind gtThrowKind; // Kind of throw block to branch to on failure
@@ -3775,10 +3820,10 @@ struct GenTreeBoundsChk : public GenTree
optimizer has a chance of eliminating some of the rng checks */
unsigned gtStkDepth;
- GenTreeBoundsChk(genTreeOps oper, var_types type, GenTreePtr arrLen, GenTreePtr index, SpecialCodeKind kind)
+ GenTreeBoundsChk(genTreeOps oper, var_types type, GenTreePtr index, GenTreePtr arrLen, SpecialCodeKind kind)
: GenTree(oper, type)
- , gtArrLen(arrLen)
, gtIndex(index)
+ , gtArrLen(arrLen)
, gtIndRngFailBB(nullptr)
, gtThrowKind(kind)
, gtStkDepth(0)
@@ -4531,6 +4576,9 @@ struct GenTreePhiArg : public GenTreeLclVarCommon
struct GenTreePutArgStk : public GenTreeUnOp
{
unsigned gtSlotNum; // Slot number of the argument to be passed on stack
+#if defined(UNIX_X86_ABI)
+ unsigned gtPadAlign; // Number of padding slots for stack alignment
+#endif
#if FEATURE_FASTTAILCALL
bool putInIncomingArgArea; // Whether this arg needs to be placed in incoming arg area.
@@ -4546,6 +4594,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type DEBUGARG(largeNode))
, gtSlotNum(slotNum)
+#if defined(UNIX_X86_ABI)
+ , gtPadAlign(0)
+#endif
, putInIncomingArgArea(_putInIncomingArgArea)
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
@@ -4567,6 +4618,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
, gtSlotNum(slotNum)
+#if defined(UNIX_X86_ABI)
+ , gtPadAlign(0)
+#endif
, putInIncomingArgArea(_putInIncomingArgArea)
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
@@ -4588,6 +4642,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type DEBUGARG(largeNode))
, gtSlotNum(slotNum)
+#if defined(UNIX_X86_ABI)
+ , gtPadAlign(0)
+#endif
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
, gtNumSlots(numSlots)
@@ -4607,6 +4664,9 @@ struct GenTreePutArgStk : public GenTreeUnOp
DEBUGARG(GenTreePtr callNode = NULL) DEBUGARG(bool largeNode = false))
: GenTreeUnOp(oper, type, op1 DEBUGARG(largeNode))
, gtSlotNum(slotNum)
+#if defined(UNIX_X86_ABI)
+ , gtPadAlign(0)
+#endif
#ifdef FEATURE_PUT_STRUCT_ARG_STK
, gtPutArgStkKind(Kind::Invalid)
, gtNumSlots(numSlots)
@@ -4625,6 +4685,18 @@ struct GenTreePutArgStk : public GenTreeUnOp
return gtSlotNum * TARGET_POINTER_SIZE;
}
+#if defined(UNIX_X86_ABI)
+ unsigned getArgPadding()
+ {
+ return gtPadAlign;
+ }
+
+ void setArgPadding(unsigned padAlign)
+ {
+ gtPadAlign = padAlign;
+ }
+#endif
+
#ifdef FEATURE_PUT_STRUCT_ARG_STK
unsigned getArgSize()
{
@@ -4968,7 +5040,7 @@ inline bool GenTree::IsIntegralConstVector(ssize_t constVal)
if ((gtOper == GT_SIMD) && (gtSIMD.gtSIMDIntrinsicID == SIMDIntrinsicInit) && gtGetOp1()->IsIntegralConst(constVal))
{
assert(varTypeIsIntegral(gtSIMD.gtSIMDBaseType));
- assert(gtGetOp2() == nullptr);
+ assert(gtGetOp2IfPresent() == nullptr);
return true;
}
#endif
@@ -5149,12 +5221,24 @@ inline bool GenTree::RequiresNonNullOp2(genTreeOps oper)
inline GenTreePtr GenTree::gtGetOp2()
{
+ assert(OperIsBinary());
+
+ GenTreePtr op2 = gtOp.gtOp2;
+
+ // Only allow null op2 if the node type allows it, e.g. GT_LIST.
+ assert((op2 != nullptr) || !RequiresNonNullOp2(gtOper));
+
+ return op2;
+}
+
+inline GenTreePtr GenTree::gtGetOp2IfPresent()
+{
/* gtOp.gtOp2 is only valid for GTK_BINOP nodes. */
GenTreePtr op2 = OperIsBinary() ? gtOp.gtOp2 : nullptr;
// This documents the genTreeOps for which gtOp.gtOp2 cannot be nullptr.
- // This helps prefix in its analyis of code which calls gtGetOp2()
+ // This helps prefix in its analysis of code which calls gtGetOp2()
assert((op2 != nullptr) || !RequiresNonNullOp2(gtOper));
@@ -5319,10 +5403,10 @@ inline bool GenTreeBlk::HasGCPtr()
return false;
}
-inline bool GenTree::isContainedSpillTemp() const
+inline bool GenTree::isUsedFromSpillTemp() const
{
#if !defined(LEGACY_BACKEND)
- // If spilled and no reg at use, then it is treated as contained.
+ // If spilled and no reg at use, then it is used from the spill temp location rather than being reloaded.
if (((gtFlags & GTF_SPILLED) != 0) && ((gtFlags & GTF_NOREG_AT_USE) != 0))
{
return true;
diff --git a/src/jit/gschecks.cpp b/src/jit/gschecks.cpp
index 9255d8fd36..e4f1c25e0f 100644
--- a/src/jit/gschecks.cpp
+++ b/src/jit/gschecks.cpp
@@ -409,7 +409,8 @@ void Compiler::gsParamsToShadows()
lvaTable[shadowVar].lvUsedInSIMDIntrinsic = varDsc->lvUsedInSIMDIntrinsic;
if (varDsc->lvSIMDType)
{
- lvaTable[shadowVar].lvBaseType = varDsc->lvBaseType;
+ lvaTable[shadowVar].lvExactSize = varDsc->lvExactSize;
+ lvaTable[shadowVar].lvBaseType = varDsc->lvBaseType;
}
#endif
lvaTable[shadowVar].lvRegStruct = varDsc->lvRegStruct;
diff --git a/src/jit/gtlist.h b/src/jit/gtlist.h
index 92265a7359..2d9255b6ce 100644
--- a/src/jit/gtlist.h
+++ b/src/jit/gtlist.h
@@ -145,6 +145,17 @@ GTNODE(LT , "<" ,GenTreeOp ,0,GTK_BINOP|GTK_RE
GTNODE(LE , "<=" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
GTNODE(GE , ">=" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
GTNODE(GT , ">" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
+#ifndef LEGACY_BACKEND
+// These are similar to GT_EQ/GT_NE but they generate "test" instead of "cmp" instructions.
+// Currently these are generated during lowering for code like ((x & y) eq|ne 0) only on
+// XArch but ARM could too use these for the same purpose as there is a "tst" instruction.
+// Note that the general case of comparing a register against 0 is handled directly by
+// codegen which emits a "test reg, reg" instruction, that would be more difficult to do
+// during lowering because the source operand is used twice so it has to be a lclvar.
+// Because of this there is no need to also add GT_TEST_LT/LE/GE/GT opers.
+GTNODE(TEST_EQ , "testEQ" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
+GTNODE(TEST_NE , "testNE" ,GenTreeOp ,0,GTK_BINOP|GTK_RELOP)
+#endif
GTNODE(COMMA , "comma" ,GenTreeOp ,0,GTK_BINOP|GTK_NOTLIR)
@@ -269,7 +280,7 @@ GTNODE(EMITNOP , "emitnop" ,GenTree ,0,GTK_LEAF|GTK_NOV
GTNODE(PINVOKE_PROLOG ,"pinvoke_prolog",GenTree ,0,GTK_LEAF|GTK_NOVALUE) // pinvoke prolog seq
GTNODE(PINVOKE_EPILOG ,"pinvoke_epilog",GenTree ,0,GTK_LEAF|GTK_NOVALUE) // pinvoke epilog seq
GTNODE(PUTARG_REG , "putarg_reg" ,GenTreeOp ,0,GTK_UNOP) // operator that places outgoing arg in register
-GTNODE(PUTARG_STK , "putarg_stk" ,GenTreePutArgStk ,0,GTK_UNOP) // operator that places outgoing arg in stack
+GTNODE(PUTARG_STK , "putarg_stk" ,GenTreePutArgStk ,0,GTK_UNOP|GTK_NOVALUE) // operator that places outgoing arg in stack
GTNODE(RETURNTRAP , "returnTrap" ,GenTreeOp ,0,GTK_UNOP|GTK_NOVALUE) // a conditional call to wait on gc
GTNODE(SWAP , "swap" ,GenTreeOp ,0,GTK_BINOP|GTK_NOVALUE) // op1 and op2 swap (registers)
GTNODE(IL_OFFSET , "il_offset" ,GenTreeStmt ,0,GTK_LEAF|GTK_NOVALUE) // marks an IL offset for debugging purposes
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
index cb09ff8b8c..b1e0f487ef 100644
--- a/src/jit/importer.cpp
+++ b/src/jit/importer.cpp
@@ -1489,17 +1489,16 @@ var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd,
const DWORD structFlags = info.compCompHnd->getClassAttribs(structHnd);
var_types structType = TYP_STRUCT;
-#ifdef FEATURE_CORECLR
- const bool hasGCPtrs = (structFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0;
-#else
- // Desktop CLR won't report FLG_CONTAINS_GC_PTR for RefAnyClass - need to check explicitly.
- const bool isRefAny = (structHnd == impGetRefAnyClass());
- const bool hasGCPtrs = isRefAny || ((structFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0);
-#endif
+ // On coreclr the check for GC includes a "may" to account for the special
+ // ByRef like span structs. The added check for "CONTAINS_STACK_PTR" is the particular bit.
+ // When this is set the struct will contain a ByRef that could be a GC pointer or a native
+ // pointer.
+ const bool mayContainGCPtrs =
+ ((structFlags & CORINFO_FLG_CONTAINS_STACK_PTR) != 0 || ((structFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0));
#ifdef FEATURE_SIMD
// Check to see if this is a SIMD type.
- if (featureSIMD && !hasGCPtrs)
+ if (featureSIMD && !mayContainGCPtrs)
{
unsigned originalSize = info.compCompHnd->getClassSize(structHnd);
@@ -1515,10 +1514,8 @@ var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd,
{
*pSimdBaseType = simdBaseType;
}
-#ifdef _TARGET_AMD64_
- // Amd64: also indicate that we use floating point registers
+ // Also indicate that we use floating point registers.
compFloatingPointUsed = true;
-#endif
}
}
}
@@ -1532,9 +1529,10 @@ var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd,
// Verify that the quick test up above via the class attributes gave a
// safe view of the type's GCness.
//
- // Note there are cases where hasGCPtrs is true but getClassGClayout
+ // Note there are cases where mayContainGCPtrs is true but getClassGClayout
// does not report any gc fields.
- assert(hasGCPtrs || (numGCVars == 0));
+
+ assert(mayContainGCPtrs || (numGCVars == 0));
if (pNumGCVars != nullptr)
{
@@ -1638,21 +1636,52 @@ GenTreePtr Compiler::impNormStructVal(GenTreePtr structVal,
case GT_COMMA:
{
- // The second thing is the block node.
+ // The second thing could either be a block node or a GT_SIMD or a GT_COMMA node.
GenTree* blockNode = structVal->gtOp.gtOp2;
assert(blockNode->gtType == structType);
- // It had better be a block node - any others should not occur here.
- assert(blockNode->OperIsBlk());
-
- // Sink the GT_COMMA below the blockNode addr.
- GenTree* blockNodeAddr = blockNode->gtOp.gtOp1;
- assert(blockNodeAddr->gtType == TYP_BYREF);
- GenTree* commaNode = structVal;
- commaNode->gtType = TYP_BYREF;
- commaNode->gtOp.gtOp2 = blockNodeAddr;
- blockNode->gtOp.gtOp1 = commaNode;
- structVal = blockNode;
- alreadyNormalized = true;
+
+ // Is this GT_COMMA(op1, GT_COMMA())?
+ GenTree* parent = structVal;
+ if (blockNode->OperGet() == GT_COMMA)
+ {
+ // Find the last node in the comma chain.
+ do
+ {
+ assert(blockNode->gtType == structType);
+ parent = blockNode;
+ blockNode = blockNode->gtOp.gtOp2;
+ } while (blockNode->OperGet() == GT_COMMA);
+ }
+
+#ifdef FEATURE_SIMD
+ if (blockNode->OperGet() == GT_SIMD)
+ {
+ parent->gtOp.gtOp2 = impNormStructVal(blockNode, structHnd, curLevel, forceNormalization);
+ alreadyNormalized = true;
+ }
+ else
+#endif
+ {
+ assert(blockNode->OperIsBlk());
+
+ // Sink the GT_COMMA below the blockNode addr.
+ // That is GT_COMMA(op1, op2=blockNode) is tranformed into
+ // blockNode(GT_COMMA(TYP_BYREF, op1, op2's op1)).
+ //
+ // In case of a chained GT_COMMA case, we sink the last
+ // GT_COMMA below the blockNode addr.
+ GenTree* blockNodeAddr = blockNode->gtOp.gtOp1;
+ assert(blockNodeAddr->gtType == TYP_BYREF);
+ GenTree* commaNode = parent;
+ commaNode->gtType = TYP_BYREF;
+ commaNode->gtOp.gtOp2 = blockNodeAddr;
+ blockNode->gtOp.gtOp1 = commaNode;
+ if (parent == structVal)
+ {
+ structVal = blockNode;
+ }
+ alreadyNormalized = true;
+ }
}
break;
@@ -3240,7 +3269,8 @@ GenTreePtr Compiler::impInitializeArrayIntrinsic(CORINFO_SIG_INFO* sig)
// Returns the GenTree that should be used to do the intrinsic instead of the call.
// Returns NULL if an intrinsic cannot be used
-GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
+GenTreePtr Compiler::impIntrinsic(GenTreePtr newobjThis,
+ CORINFO_CLASS_HANDLE clsHnd,
CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
int memberRef,
@@ -3252,7 +3282,7 @@ GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
#if COR_JIT_EE_VERSION > 460
CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method, &mustExpand);
#else
- CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method);
+ CorInfoIntrinsics intrinsicID = info.compCompHnd->getIntrinsicID(method);
#endif
*pIntrinsicID = intrinsicID;
@@ -3576,7 +3606,33 @@ GenTreePtr Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
retNode = op1;
break;
#endif
-
+ // Implement ByReference Ctor. This wraps the assignment of the ref into a byref-like field
+ // in a value type. The canonical example of this is Span<T>. In effect this is just a
+ // substitution. The parameter byref will be assigned into the newly allocated object.
+ case CORINFO_INTRINSIC_ByReference_Ctor:
+ {
+ // Remove call to constructor and directly assign the byref passed
+ // to the call to the first slot of the ByReference struct.
+ op1 = impPopStack().val;
+ GenTreePtr thisptr = newobjThis;
+ CORINFO_FIELD_HANDLE fldHnd = info.compCompHnd->getFieldInClass(clsHnd, 0);
+ GenTreePtr field = gtNewFieldRef(TYP_BYREF, fldHnd, thisptr, 0, false);
+ GenTreePtr assign = gtNewAssignNode(field, op1);
+ GenTreePtr byReferenceStruct = gtCloneExpr(thisptr->gtGetOp1());
+ assert(byReferenceStruct != nullptr);
+ impPushOnStack(byReferenceStruct, typeInfo(TI_STRUCT, clsHnd));
+ retNode = assign;
+ break;
+ }
+ // Implement ptr value getter for ByReference struct.
+ case CORINFO_INTRINSIC_ByReference_Value:
+ {
+ op1 = impPopStack().val;
+ CORINFO_FIELD_HANDLE fldHnd = info.compCompHnd->getFieldInClass(clsHnd, 0);
+ GenTreePtr field = gtNewFieldRef(TYP_BYREF, fldHnd, op1, 0, false);
+ retNode = field;
+ break;
+ }
default:
/* Unknown intrinsic */
break;
@@ -5359,29 +5415,23 @@ GenTreePtr Compiler::impTransformThis(GenTreePtr thisPtr,
}
//------------------------------------------------------------------------
-// impCanPInvokeInline: examine information from a call to see if the call
-// qualifies as an inline pinvoke.
-//
-// Arguments:
-// block - block contaning the call, or for inlinees, block
-// containing the call being inlined
+// impCanPInvokeInline: check whether PInvoke inlining should enabled in current method.
//
// Return Value:
-// true if this call qualifies as an inline pinvoke, false otherwise
+// true if PInvoke inlining should be enabled in current method, false otherwise
//
// Notes:
-// Checks basic legality and then a number of ambient conditions
-// where we could pinvoke but choose not to
+// Checks a number of ambient conditions where we could pinvoke but choose not to
-bool Compiler::impCanPInvokeInline(BasicBlock* block)
+bool Compiler::impCanPInvokeInline()
{
- return impCanPInvokeInlineCallSite(block) && getInlinePInvokeEnabled() && (!opts.compDbgCode) &&
- (compCodeOpt() != SMALL_CODE) && (!opts.compNoPInvokeInlineCB) // profiler is preventing inline pinvoke
+ return getInlinePInvokeEnabled() && (!opts.compDbgCode) && (compCodeOpt() != SMALL_CODE) &&
+ (!opts.compNoPInvokeInlineCB) // profiler is preventing inline pinvoke
;
}
//------------------------------------------------------------------------
-// impCanPInvokeInlineSallSite: basic legality checks using information
+// impCanPInvokeInlineCallSite: basic legality checks using information
// from a call to see if the call qualifies as an inline pinvoke.
//
// Arguments:
@@ -5410,6 +5460,17 @@ bool Compiler::impCanPInvokeInline(BasicBlock* block)
bool Compiler::impCanPInvokeInlineCallSite(BasicBlock* block)
{
+ if (block->hasHndIndex())
+ {
+ return false;
+ }
+
+ // The remaining limitations do not apply to CoreRT
+ if (IsTargetAbi(CORINFO_CORERT_ABI))
+ {
+ return true;
+ }
+
#ifdef _TARGET_AMD64_
// On x64, we disable pinvoke inlining inside of try regions.
// Here is the comment from JIT64 explaining why:
@@ -5431,12 +5492,13 @@ bool Compiler::impCanPInvokeInlineCallSite(BasicBlock* block)
//
// A desktop test case where this seems to matter is
// jit\jit64\ebvts\mcpp\sources2\ijw\__clrcall\vector_ctor_dtor.02\deldtor_clr.exe
- const bool inX64Try = block->hasTryIndex();
-#else
- const bool inX64Try = false;
+ if (block->hasTryIndex())
+ {
+ return false;
+ }
#endif // _TARGET_AMD64_
- return !inX64Try && !block->hasHndIndex();
+ return true;
}
//------------------------------------------------------------------------
@@ -5502,27 +5564,38 @@ void Compiler::impCheckForPInvokeCall(
}
optNativeCallCount++;
- if (opts.compMustInlinePInvokeCalli && methHnd == nullptr)
+ if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB) && methHnd == nullptr)
{
- // Always inline pinvoke.
+ // PInvoke CALLI in IL stubs must be inlined
}
else
{
- // Check legality and profitability.
- if (!impCanPInvokeInline(block))
+ // Check legality
+ if (!impCanPInvokeInlineCallSite(block))
{
return;
}
- if (info.compCompHnd->pInvokeMarshalingRequired(methHnd, sig))
+ // PInvoke CALL in IL stubs must be inlined on CoreRT. Skip the ambient conditions checks and
+ // profitability checks
+ if (!(opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB) && IsTargetAbi(CORINFO_CORERT_ABI)))
{
- return;
+ if (!impCanPInvokeInline())
+ {
+ return;
+ }
+
+ // Size-speed tradeoff: don't use inline pinvoke at rarely
+ // executed call sites. The non-inline version is more
+ // compact.
+ if (block->isRunRarely())
+ {
+ return;
+ }
}
- // Size-speed tradeoff: don't use inline pinvoke at rarely
- // executed call sites. The non-inline version is more
- // compact.
- if (block->isRunRarely())
+ // The expensive check should be last
+ if (info.compCompHnd->pInvokeMarshalingRequired(methHnd, sig))
{
return;
}
@@ -6189,7 +6262,7 @@ bool Compiler::impIsTailCallILPattern(bool tailPrefixed,
((nextOpcode == CEE_NOP) || ((nextOpcode == CEE_POP) && (++cntPop == 1)))); // Next opcode = nop or exactly
// one pop seen so far.
#else
- nextOpcode = (OPCODE)getU1LittleEndian(codeAddrOfNextOpcode);
+ nextOpcode = (OPCODE)getU1LittleEndian(codeAddrOfNextOpcode);
#endif
if (isCallPopAndRet)
@@ -6359,6 +6432,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
eeGetSig(pResolvedToken->token, info.compScopeHnd, impTokenLookupContextHandle, &calliSig);
callRetTyp = JITtype2varType(calliSig.retType);
+ clsHnd = calliSig.retTypeClass;
call = impImportIndirectCall(&calliSig, ilOffset);
@@ -6387,6 +6461,16 @@ var_types Compiler::impImportCall(OPCODE opcode,
call->gtCall.callSig = new (this, CMK_CorSig) CORINFO_SIG_INFO;
*call->gtCall.callSig = calliSig;
#endif // DEBUG
+
+ if (IsTargetAbi(CORINFO_CORERT_ABI))
+ {
+ bool managedCall = (calliSig.callConv & GTF_CALL_UNMANAGED) == 0;
+ if (managedCall)
+ {
+ call->AsCall()->SetFatPointerCandidate();
+ setMethodHasFatPointer();
+ }
+ }
}
else // (opcode != CEE_CALLI)
{
@@ -6435,7 +6519,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
if (mflags & CORINFO_FLG_DONT_INLINE_CALLER)
{
- compInlineResult->NoteFatal(InlineObservation::CALLEE_STACK_CRAWL_MARK);
+ compInlineResult->NoteFatal(InlineObservation::CALLEE_HAS_NOINLINE_CALLEE);
return callRetTyp;
}
@@ -6490,7 +6574,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
// <NICE> Factor this into getCallInfo </NICE>
if ((mflags & CORINFO_FLG_INTRINSIC) && !pConstrainedResolvedToken)
{
- call = impIntrinsic(clsHnd, methHnd, sig, pResolvedToken->token, readonlyCall,
+ call = impIntrinsic(newobjThis, clsHnd, methHnd, sig, pResolvedToken->token, readonlyCall,
(canTailCall && (tailCall != 0)), &intrinsicID);
if (call != nullptr)
@@ -6533,7 +6617,6 @@ var_types Compiler::impImportCall(OPCODE opcode,
if ((mflags & CORINFO_FLG_VIRTUAL) && (mflags & CORINFO_FLG_EnC) && (opcode == CEE_CALLVIRT))
{
NO_WAY("Virtual call to a function added via EnC is not supported");
- goto DONE_CALL;
}
if ((sig->callConv & CORINFO_CALLCONV_MASK) != CORINFO_CALLCONV_DEFAULT &&
@@ -7469,10 +7552,8 @@ DONE:
}
}
-// Note: we assume that small return types are already normalized by the managed callee
-// or by the pinvoke stub for calls to unmanaged code.
-
-DONE_CALL:
+ // Note: we assume that small return types are already normalized by the managed callee
+ // or by the pinvoke stub for calls to unmanaged code.
if (!bIntrinsicImported)
{
@@ -7517,6 +7598,7 @@ DONE_CALL:
impMarkInlineCandidate(call, exactContextHnd, callInfo);
}
+DONE_CALL:
// Push or append the result of the call
if (callRetTyp == TYP_VOID)
{
@@ -7569,9 +7651,11 @@ DONE_CALL:
}
}
- if (call->gtOper == GT_CALL)
+ if (call->IsCall())
{
// Sometimes "call" is not a GT_CALL (if we imported an intrinsic that didn't turn into a call)
+
+ bool fatPointerCandidate = call->AsCall()->IsFatPointerCandidate();
if (varTypeIsStruct(callRetTyp))
{
call = impFixupCallStructReturn(call, sig->retTypeClass);
@@ -7580,6 +7664,7 @@ DONE_CALL:
if ((call->gtFlags & GTF_CALL_INLINE_CANDIDATE) != 0)
{
assert(opts.OptEnabled(CLFLG_INLINING));
+ assert(!fatPointerCandidate); // We should not try to inline calli.
// Make the call its own tree (spill the stack if needed).
impAppendTree(call, (unsigned)CHECK_SPILL_ALL, impCurStmtOffs);
@@ -7589,6 +7674,24 @@ DONE_CALL:
}
else
{
+ if (fatPointerCandidate)
+ {
+ // fatPointer candidates should be in statements of the form call() or var = call().
+ // Such form allows to find statements with fat calls without walking through whole trees
+ // and removes problems with cutting trees.
+ assert(!bIntrinsicImported);
+ assert(IsTargetAbi(CORINFO_CORERT_ABI));
+ if (call->OperGet() != GT_LCL_VAR) // can be already converted by impFixupCallStructReturn.
+ {
+ unsigned calliSlot = lvaGrabTemp(true DEBUGARG("calli"));
+ LclVarDsc* varDsc = &lvaTable[calliSlot];
+ varDsc->lvVerTypeInfo = tiRetVal;
+ impAssignTempGen(calliSlot, call, clsHnd, (unsigned)CHECK_SPILL_NONE);
+ // impAssignTempGen can change src arg list and return type for call that returns struct.
+ var_types type = genActualType(lvaTable[calliSlot].TypeGet());
+ call = gtNewLclvNode(calliSlot, type);
+ }
+ }
// For non-candidates we must also spill, since we
// might have locals live on the eval stack that this
// call can modify.
diff --git a/src/jit/inline.def b/src/jit/inline.def
index ff0b21100e..2a6f5a3f73 100644
--- a/src/jit/inline.def
+++ b/src/jit/inline.def
@@ -39,6 +39,7 @@ INLINE_OBSERVATION(HAS_LEAVE, bool, "has leave",
INLINE_OBSERVATION(HAS_MANAGED_VARARGS, bool, "managed varargs", FATAL, CALLEE)
INLINE_OBSERVATION(HAS_NATIVE_VARARGS, bool, "native varargs", FATAL, CALLEE)
INLINE_OBSERVATION(HAS_NO_BODY, bool, "has no body", FATAL, CALLEE)
+INLINE_OBSERVATION(HAS_NOINLINE_CALLEE, bool, "in corelib, noinline callee", FATAL, CALLEE)
INLINE_OBSERVATION(HAS_NULL_FOR_LDELEM, bool, "has null pointer for ldelem", FATAL, CALLEE)
INLINE_OBSERVATION(IS_ARRAY_METHOD, bool, "is array method", FATAL, CALLEE)
INLINE_OBSERVATION(IS_GENERIC_VIRTUAL, bool, "generic virtual", FATAL, CALLEE)
@@ -55,7 +56,6 @@ INLINE_OBSERVATION(NEEDS_SECURITY_CHECK, bool, "needs security check",
INLINE_OBSERVATION(NO_METHOD_INFO, bool, "cannot get method info", FATAL, CALLEE)
INLINE_OBSERVATION(NOT_PROFITABLE_INLINE, bool, "unprofitable inline", FATAL, CALLEE)
INLINE_OBSERVATION(RANDOM_REJECT, bool, "random reject", FATAL, CALLEE)
-INLINE_OBSERVATION(STACK_CRAWL_MARK, bool, "uses stack crawl mark", FATAL, CALLEE)
INLINE_OBSERVATION(STFLD_NEEDS_HELPER, bool, "stfld needs helper", FATAL, CALLEE)
INLINE_OBSERVATION(THROW_WITH_INVALID_STACK, bool, "throw with invalid stack", FATAL, CALLEE)
INLINE_OBSERVATION(TOO_MANY_ARGUMENTS, bool, "too many arguments", FATAL, CALLEE)
diff --git a/src/jit/instr.cpp b/src/jit/instr.cpp
index edc4483c6b..7332ba6c71 100644
--- a/src/jit/instr.cpp
+++ b/src/jit/instr.cpp
@@ -3513,6 +3513,12 @@ instruction CodeGen::ins_CopyIntToFloat(var_types srcType, var_types dstType)
{
// On SSE2/AVX - the same instruction is used for moving double/quad word to XMM/YMM register.
assert((srcType == TYP_INT) || (srcType == TYP_UINT) || (srcType == TYP_LONG) || (srcType == TYP_ULONG));
+
+#if !defined(_TARGET_64BIT_)
+ // No 64-bit registers on x86.
+ assert((srcType != TYP_LONG) && (srcType != TYP_ULONG));
+#endif // !defined(_TARGET_64BIT_)
+
return INS_mov_i2xmm;
}
@@ -3520,6 +3526,12 @@ instruction CodeGen::ins_CopyFloatToInt(var_types srcType, var_types dstType)
{
// On SSE2/AVX - the same instruction is used for moving double/quad word of XMM/YMM to an integer register.
assert((dstType == TYP_INT) || (dstType == TYP_UINT) || (dstType == TYP_LONG) || (dstType == TYP_ULONG));
+
+#if !defined(_TARGET_64BIT_)
+ // No 64-bit registers on x86.
+ assert((dstType != TYP_LONG) && (dstType != TYP_ULONG));
+#endif // !defined(_TARGET_64BIT_)
+
return INS_mov_xmm2i;
}
diff --git a/src/jit/instrsxarch.h b/src/jit/instrsxarch.h
index 4317334bf2..8ab3a845ba 100644
--- a/src/jit/instrsxarch.h
+++ b/src/jit/instrsxarch.h
@@ -320,6 +320,9 @@ INST3( pcmpgtq, "pcmpgtq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS
INST3( pmulld, "pmulld" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x40)) // Packed multiply 32 bit unsigned integers and store lower 32 bits of each result
INST3( ptest, "ptest" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x17)) // Packed logical compare
INST3( phaddd, "phaddd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x02)) // Packed horizontal add
+INST3( pabsb, "pabsb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1C)) // Packed absolute value of bytes
+INST3( pabsw, "pabsw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1D)) // Packed absolute value of 16-bit integers
+INST3( pabsd, "pabsd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1E)) // Packed absolute value of 32-bit integers
INST3(LAST_SSE4_INSTRUCTION, "LAST_SSE4_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
INST3(FIRST_AVX_INSTRUCTION, "FIRST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
diff --git a/src/jit/jit.h b/src/jit/jit.h
index 220294f825..05b154e15a 100644
--- a/src/jit/jit.h
+++ b/src/jit/jit.h
@@ -416,14 +416,6 @@ typedef ptrdiff_t ssize_t;
//=============================================================================
-#define FANCY_ARRAY_OPT 0 // optimize more complex index checks
-
-//=============================================================================
-
-#define LONG_ASG_OPS 0 // implementation isn't complete yet
-
-//=============================================================================
-
#define OPT_MULT_ADDSUB 1 // optimize consecutive "lclVar += or -= icon"
#define OPT_BOOL_OPS 1 // optimize boolean operations
@@ -699,11 +691,7 @@ inline unsigned int unsigned_abs(int x)
#ifdef _TARGET_64BIT_
inline size_t unsigned_abs(ssize_t x)
{
-#ifndef FEATURE_PAL
return ((size_t)abs(x));
-#else // !FEATURE_PAL
- return ((size_t)labs(x));
-#endif // !FEATURE_PAL
}
#endif // _TARGET_64BIT_
diff --git a/src/jit/jit.settings.targets b/src/jit/jit.settings.targets
index 6c0474a00c..8749b80242 100644
--- a/src/jit/jit.settings.targets
+++ b/src/jit/jit.settings.targets
@@ -95,9 +95,11 @@
<ItemGroup Condition="'$(TargetArch)'=='i386'">
<CppCompile Include="..\emitXArch.cpp" />
<CppCompile Include="..\TargetX86.cpp" />
+ <CppCompile Include="..\unwindx86.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='True'" Include="..\stackfp.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\DecomposeLongs.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\LowerXArch.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\lsraxarch.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\CodeGenXArch.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\SIMD.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\SIMDCodeGenXArch.cpp" />
@@ -107,6 +109,7 @@
<CppCompile Include="..\emitXArch.cpp" />
<CppCompile Include="..\TargetAmd64.cpp" />
<CppCompile Include="..\LowerXArch.cpp" />
+ <CppCompile Include="..\lsraxarch.cpp" />
<CppCompile Include="..\CodeGenXArch.cpp" />
<CppCompile Include="..\SIMD.cpp" />
<CppCompile Include="..\SIMDCodeGenXArch.cpp" />
@@ -118,6 +121,7 @@
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='True'" Include="..\registerfp.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\DecomposeLongs.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\LowerArm.cpp" />
+ <CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\lsraarm.cpp" />
<CppCompile Condition="'$(ClDefines.Contains(`LEGACY_BACKEND`))'=='False'" Include="..\CodeGenArm.cpp" />
<CppCompile Include="..\unwindArm.cpp" />
</ItemGroup>
@@ -126,6 +130,7 @@
<CppCompile Include="..\emitarm64.cpp" />
<CppCompile Include="..\TargetArm64.cpp" />
<CppCompile Include="..\LowerArm64.cpp" />
+ <CppCompile Include="..\lsraarm64.cpp" />
<CppCompile Include="..\CodeGenArm64.cpp" />
<CppCompile Include="..\unwindArm.cpp" />
<CppCompile Include="..\unwindArm64.cpp" />
diff --git a/src/jit/jitconfigvalues.h b/src/jit/jitconfigvalues.h
index 39a2505246..4623fe8268 100644
--- a/src/jit/jitconfigvalues.h
+++ b/src/jit/jitconfigvalues.h
@@ -204,13 +204,14 @@ CONFIG_INTEGER(AltJitAssertOnNYI, W("AltJitAssertOnNYI"), 1) // Controls the Alt
CONFIG_INTEGER(EnableSSE3_4, W("EnableSSE3_4"), 1) // Enable SSE3, SSSE3, SSE 4.1 and 4.2 instruction set as default
#endif
-#if defined(_TARGET_AMD64_)
-CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1) // Enable AVX instruction set for wide operations as default.
-// When both AVX and SSE3_4 are set, we will use the most capable instruction set available
-// which will prefer AVX over SSE3/4.
-#else // !defined(_TARGET_AMD64_)
-CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 0) // Enable AVX instruction set for wide operations as default
-#endif // defined(_TARGET_AMD64_)
+#if defined(_TARGET_AMD64_) || defined(_TARGET_X86_)
+// Enable AVX instruction set for wide operations as default. When both AVX and SSE3_4 are set, we will use the most
+// capable instruction set available which will prefer AVX over SSE3/4.
+CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1)
+#else // !defined(_TARGET_AMD64_) && !defined(_TARGET_X86_)
+// Enable AVX instruction set for wide operations as default
+CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 0)
+#endif // !defined(_TARGET_AMD64_) && !defined(_TARGET_X86_)
#if !defined(DEBUG) && !defined(_DEBUG)
CONFIG_INTEGER(JitEnableNoWayAssert, W("JitEnableNoWayAssert"), 0)
@@ -274,6 +275,16 @@ CONFIG_INTEGER(JitInlinePolicyModel, W("JitInlinePolicyModel"), 0)
CONFIG_INTEGER(JitEECallTimingInfo, W("JitEECallTimingInfo"), 0)
+#if defined(DEBUG)
+#if defined(FEATURE_CORECLR)
+CONFIG_INTEGER(JitEnableFinallyCloning, W("JitEnableFinallyCloning"), 1)
+CONFIG_INTEGER(JitEnableRemoveEmptyTry, W("JitEnableRemoveEmptyTry"), 1)
+#else
+CONFIG_INTEGER(JitEnableFinallyCloning, W("JitEnableFinallyCloning"), 0)
+CONFIG_INTEGER(JitEnableRemoveEmptyTry, W("JitEnableRemoveEmptyTry"), 0)
+#endif // defined(FEATURE_CORECLR)
+#endif // DEBUG
+
#undef CONFIG_INTEGER
#undef CONFIG_STRING
#undef CONFIG_METHODSET
diff --git a/src/jit/jiteh.cpp b/src/jit/jiteh.cpp
index 4b3ceaecf6..2d0eee366f 100644
--- a/src/jit/jiteh.cpp
+++ b/src/jit/jiteh.cpp
@@ -93,7 +93,7 @@ bool EHblkDsc::HasFinallyHandler()
bool EHblkDsc::HasFaultHandler()
{
- return ebdHandlerType == EH_HANDLER_FAULT;
+ return (ebdHandlerType == EH_HANDLER_FAULT) || (ebdHandlerType == EH_HANDLER_FAULT_WAS_FINALLY);
}
bool EHblkDsc::HasFinallyOrFaultHandler()
@@ -2426,6 +2426,11 @@ bool Compiler::fgNormalizeEHCase2()
// this once per dup.
fgReplaceJumpTarget(predBlock, newTryStart, insertBeforeBlk);
+ // Need to adjust ref counts here since we're retargeting edges.
+ newTryStart->bbRefs++;
+ assert(insertBeforeBlk->countOfInEdges() > 0);
+ insertBeforeBlk->bbRefs--;
+
#ifdef DEBUG
if (verbose)
{
diff --git a/src/jit/jiteh.h b/src/jit/jiteh.h
index 573116282c..502d2153c2 100644
--- a/src/jit/jiteh.h
+++ b/src/jit/jiteh.h
@@ -27,7 +27,8 @@ enum EHHandlerType
EH_HANDLER_CATCH = 0x1, // Don't use zero (to aid debugging uninitialized memory)
EH_HANDLER_FILTER,
EH_HANDLER_FAULT,
- EH_HANDLER_FINALLY
+ EH_HANDLER_FINALLY,
+ EH_HANDLER_FAULT_WAS_FINALLY
};
// ToCORINFO_EH_CLAUSE_FLAGS: Convert an internal EHHandlerType to a CORINFO_EH_CLAUSE_FLAGS value
@@ -41,6 +42,7 @@ inline CORINFO_EH_CLAUSE_FLAGS ToCORINFO_EH_CLAUSE_FLAGS(EHHandlerType type)
case EH_HANDLER_FILTER:
return CORINFO_EH_CLAUSE_FILTER;
case EH_HANDLER_FAULT:
+ case EH_HANDLER_FAULT_WAS_FINALLY:
return CORINFO_EH_CLAUSE_FAULT;
case EH_HANDLER_FINALLY:
return CORINFO_EH_CLAUSE_FINALLY;
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index ea9c573a02..b4e4cc6e55 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -465,7 +465,7 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo)
varDsc->lvArgReg = genMapIntRegArgNumToRegNum(retBuffArgNum);
}
-#if FEATURE_MULTIREG__ARGS
+#if FEATURE_MULTIREG_ARGS
varDsc->lvOtherArgReg = REG_NA;
#endif
varDsc->setPrefReg(varDsc->lvArgReg, this);
@@ -488,6 +488,16 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo)
varDsc->lvType = TYP_I_IMPL;
}
}
+#ifdef FEATURE_SIMD
+ else if (featureSIMD && varTypeIsSIMD(info.compRetType))
+ {
+ varDsc->lvSIMDType = true;
+ varDsc->lvBaseType =
+ getBaseTypeAndSizeOfSIMDType(info.compMethodInfo->args.retTypeClass, &varDsc->lvExactSize);
+ assert(varDsc->lvBaseType != TYP_UNKNOWN);
+ }
+#endif // FEATURE_SIMD
+
assert(isValidIntArgReg(varDsc->lvArgReg));
#ifdef DEBUG
@@ -1059,7 +1069,7 @@ void Compiler::lvaInitVarArgsHandle(InitVarDscInfo* varDscInfo)
varDsc->lvIsRegArg = 1;
varDsc->lvArgReg = genMapRegArgNumToRegNum(varArgHndArgNum, TYP_I_IMPL);
-#if FEATURE_MULTIREG__ARGS
+#if FEATURE_MULTIREG_ARGS
varDsc->lvOtherArgReg = REG_NA;
#endif
varDsc->setPrefReg(varDsc->lvArgReg, this);
@@ -1414,9 +1424,16 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
if (typeHnd != StructPromotionInfo->typeHnd)
{
- // sizeof(double) represents the size of the largest primitive type that we can struct promote
- // In the future this may be changing to XMM_REGSIZE_BYTES
- const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * sizeof(double); // must be a compile time constant
+ // sizeof(double) represents the size of the largest primitive type that we can struct promote.
+ // In the future this may be changing to XMM_REGSIZE_BYTES.
+ // Note: MaxOffset is used below to declare a local array, and therefore must be a compile-time constant.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#ifdef FEATURE_SIMD
+ // This will allow promotion of 2 Vector<T> fields on AVX2, or 4 Vector<T> fields on SSE2.
+ const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * XMM_REGSIZE_BYTES;
+#else // !FEATURE_SIMD
+ const int MaxOffset = MAX_NumOfFieldsInPromotableStruct * sizeof(double);
+#endif // !FEATURE_SIMD
assert((BYTE)MaxOffset == MaxOffset); // because lvaStructFieldInfo.fldOffset is byte-sized
assert((BYTE)MAX_NumOfFieldsInPromotableStruct ==
@@ -1507,13 +1524,31 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
CorInfoType corType = info.compCompHnd->getFieldType(pFieldInfo->fldHnd, &pFieldInfo->fldTypeHnd);
var_types varType = JITtype2varType(corType);
pFieldInfo->fldType = varType;
- pFieldInfo->fldSize = genTypeSize(varType);
+ unsigned size = genTypeSize(varType);
+ pFieldInfo->fldSize = size;
if (varTypeIsGC(varType))
{
containsGCpointers = true;
}
+#ifdef FEATURE_SIMD
+ // Check to see if this is a SIMD type.
+ // We will only check this if we have already found a SIMD type, which will be true if
+ // we have encountered any SIMD intrinsics.
+ if (usesSIMDTypes() && (pFieldInfo->fldSize == 0) && isSIMDClass(pFieldInfo->fldTypeHnd))
+ {
+ unsigned simdSize;
+ var_types simdBaseType = getBaseTypeAndSizeOfSIMDType(pFieldInfo->fldTypeHnd, &simdSize);
+ if (simdBaseType != TYP_UNKNOWN)
+ {
+ varType = getSIMDTypeForSize(simdSize);
+ pFieldInfo->fldType = varType;
+ pFieldInfo->fldSize = simdSize;
+ }
+ }
+#endif // FEATURE_SIMD
+
if (pFieldInfo->fldSize == 0)
{
// Non-primitive struct field. Don't promote.
@@ -1556,8 +1591,10 @@ void Compiler::lvaCanPromoteStructType(CORINFO_CLASS_HANDLE typeHnd,
#endif // _TARGET_ARM_
}
- // If we saw any GC pointer fields above then the CORINFO_FLG_CONTAINS_GC_PTR has to be set!
- noway_assert((containsGCpointers == false) || ((typeFlags & CORINFO_FLG_CONTAINS_GC_PTR) != 0));
+ // If we saw any GC pointer or by-ref fields above then CORINFO_FLG_CONTAINS_GC_PTR or
+ // CORINFO_FLG_CONTAINS_STACK_PTR has to be set!
+ noway_assert((containsGCpointers == false) ||
+ ((typeFlags & (CORINFO_FLG_CONTAINS_GC_PTR | CORINFO_FLG_CONTAINS_STACK_PTR)) != 0));
// If we have "Custom Layout" then we might have an explicit Size attribute
// Managed C++ uses this for its structs, such C++ types will not contain GC pointers.
@@ -1683,7 +1720,7 @@ void Compiler::lvaPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* Stru
{
lvaStructFieldInfo* pFieldInfo = &StructPromotionInfo->fields[index];
- if (varTypeIsFloating(pFieldInfo->fldType))
+ if (varTypeIsFloating(pFieldInfo->fldType) || varTypeIsSIMD(pFieldInfo->fldType))
{
lvaTable[lclNum].lvContainsFloatingFields = 1;
// Whenever we promote a struct that contains a floating point field
@@ -1727,12 +1764,32 @@ void Compiler::lvaPromoteStructVar(unsigned lclNum, lvaStructPromotionInfo* Stru
fieldVarDsc->lvIsRegArg = true;
fieldVarDsc->lvArgReg = varDsc->lvArgReg;
fieldVarDsc->setPrefReg(varDsc->lvArgReg, this); // Set the preferred register
+#if FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD)
+ if (varTypeIsSIMD(fieldVarDsc))
+ {
+ // This field is a SIMD type, and will be considered to be passed in multiple registers
+ // if the parent struct was. Note that this code relies on the fact that if there is
+ // a SIMD field of an enregisterable struct, it is the only field.
+ // We will assert that, in case future changes are made to the ABI.
+ assert(varDsc->lvFieldCnt == 1);
+ fieldVarDsc->lvOtherArgReg = varDsc->lvOtherArgReg;
+ }
+#endif // FEATURE_MULTIREG_ARGS && defined(FEATURE_SIMD)
lvaMarkRefsWeight = BB_UNITY_WEIGHT; // incRefCnts can use this compiler global variable
fieldVarDsc->incRefCnts(BB_UNITY_WEIGHT, this); // increment the ref count for prolog initialization
}
#endif
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(pFieldInfo->fldType))
+ {
+ // Set size to zero so that lvaSetStruct will appropriately set the SIMD-relevant fields.
+ fieldVarDsc->lvExactSize = 0;
+ lvaSetStruct(varNum, pFieldInfo->fldTypeHnd, false, true);
+ }
+#endif // FEATURE_SIMD
+
#ifdef DEBUG
// This temporary should not be converted to a double in stress mode,
// because we introduce assigns to it after the stress conversion
@@ -1947,14 +2004,14 @@ bool Compiler::lvaIsMultiregStruct(LclVarDsc* varDsc)
if (howToPassStruct == SPK_ByValueAsHfa)
{
- assert(type = TYP_STRUCT);
+ assert(type == TYP_STRUCT);
return true;
}
#if defined(FEATURE_UNIX_AMD64_STRUCT_PASSING) || defined(_TARGET_ARM64_)
if (howToPassStruct == SPK_ByValue)
{
- assert(type = TYP_STRUCT);
+ assert(type == TYP_STRUCT);
return true;
}
#endif
@@ -2029,7 +2086,6 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool
}
else
{
- assert(varDsc->lvExactSize != 0);
#if FEATURE_SIMD
assert(!varTypeIsSIMD(varDsc) || (varDsc->lvBaseType != TYP_UNKNOWN));
#endif // FEATURE_SIMD
@@ -3082,37 +3138,6 @@ void Compiler::lvaMarkLclRefs(GenTreePtr tree)
#endif
}
-#if FANCY_ARRAY_OPT
-
- /* Special case: assignment node */
-
- if (tree->gtOper == GT_ASG)
- {
- if (tree->gtType == TYP_INT)
- {
- unsigned lclNum1;
- LclVarDsc* varDsc1;
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
-
- if (op1->gtOper != GT_LCL_VAR)
- return;
-
- lclNum1 = op1->gtLclVarCommon.gtLclNum;
- noway_assert(lclNum1 < lvaCount);
- varDsc1 = lvaTable + lclNum1;
-
- if (varDsc1->lvAssignOne)
- varDsc1->lvAssignTwo = true;
- else
- varDsc1->lvAssignOne = true;
- }
-
- return;
- }
-
-#endif
-
#ifdef _TARGET_XARCH_
/* Special case: integer shift node by a variable amount */
@@ -5750,6 +5775,7 @@ void Compiler::lvaAlignFrame()
#elif defined(_TARGET_X86_)
+#if DOUBLE_ALIGN
if (genDoubleAlign())
{
// Double Frame Alignement for x86 is handled in Compiler::lvaAssignVirtualFrameOffsetsToLocals()
@@ -5760,6 +5786,30 @@ void Compiler::lvaAlignFrame()
lvaIncrementFrameSize(sizeof(void*));
}
}
+#endif
+
+ if (STACK_ALIGN > REGSIZE_BYTES)
+ {
+ if (lvaDoneFrameLayout != FINAL_FRAME_LAYOUT)
+ {
+ // If we are not doing final layout, we don't know the exact value of compLclFrameSize
+ // and thus do not know how much we will need to add in order to be aligned.
+ // We add the maximum pad that we could ever have (which is 12)
+ lvaIncrementFrameSize(STACK_ALIGN - REGSIZE_BYTES);
+ }
+
+ // Align the stack with STACK_ALIGN value.
+ int adjustFrameSize = compLclFrameSize;
+#if defined(UNIX_X86_ABI)
+ // we need to consider spilled register(s) plus return address and/or EBP
+ int adjustCount = compCalleeRegsPushed + 1 + (codeGen->isFramePointerUsed() ? 1 : 0);
+ adjustFrameSize += (adjustCount * REGSIZE_BYTES) % STACK_ALIGN;
+#endif
+ if ((adjustFrameSize % STACK_ALIGN) != 0)
+ {
+ lvaIncrementFrameSize(STACK_ALIGN - (adjustFrameSize % STACK_ALIGN));
+ }
+ }
#else
NYI("TARGET specific lvaAlignFrame");
diff --git a/src/jit/lir.cpp b/src/jit/lir.cpp
index 35dd1815ef..6eb8a49aca 100644
--- a/src/jit/lir.cpp
+++ b/src/jit/lir.cpp
@@ -1494,9 +1494,13 @@ bool LIR::Range::CheckLIR(Compiler* compiler, bool checkUnusedValues) const
}
else if (!def->IsValue())
{
- // Calls may contain "uses" of nodes that do not produce a value. This is an artifact of
- // the HIR and should probably be fixed, but doing so is an unknown amount of work.
- assert(node->OperGet() == GT_CALL);
+ // Stack arguments do not produce a value, but they are considered children of the call.
+ // It may be useful to remove these from being call operands, but that may also impact
+ // other code that relies on being able to reach all the operands from a call node.
+ // The GT_NOP case is because sometimes we eliminate stack argument stores as dead, but
+ // instead of removing them we replace with a NOP.
+ assert((node->OperGet() == GT_CALL) &&
+ (def->OperIsStore() || (def->OperGet() == GT_PUTARG_STK) || (def->OperGet() == GT_NOP)));
continue;
}
diff --git a/src/jit/liveness.cpp b/src/jit/liveness.cpp
index 423d72b9b2..c6663185e4 100644
--- a/src/jit/liveness.cpp
+++ b/src/jit/liveness.cpp
@@ -19,34 +19,15 @@
*
* Helper for Compiler::fgPerBlockLocalVarLiveness().
* The goal is to compute the USE and DEF sets for a basic block.
- * However with the new improvement to the data flow analysis (DFA),
- * we do not mark x as used in x = f(x) when there are no side effects in f(x).
- * 'asgdLclVar' is set when 'tree' is part of an expression with no side-effects
- * which is assigned to asgdLclVar, ie. asgdLclVar = (... tree ...)
*/
-void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar)
+void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree)
{
- bool rhsUSEDEF = false;
- unsigned lclNum;
- unsigned lhsLclNum;
- LclVarDsc* varDsc;
+ assert((tree->OperIsLocal() && (tree->OperGet() != GT_PHI_ARG)) || tree->OperIsLocalAddr());
- noway_assert(tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_VAR_ADDR || tree->gtOper == GT_LCL_FLD ||
- tree->gtOper == GT_LCL_FLD_ADDR || tree->gtOper == GT_STORE_LCL_VAR ||
- tree->gtOper == GT_STORE_LCL_FLD);
-
- if (tree->gtOper == GT_LCL_VAR || tree->gtOper == GT_LCL_VAR_ADDR || tree->gtOper == GT_STORE_LCL_VAR)
- {
- lclNum = tree->gtLclNum;
- }
- else
- {
- noway_assert(tree->OperIsLocalField());
- lclNum = tree->gtLclFld.gtLclNum;
- }
+ const unsigned lclNum = tree->gtLclNum;
+ assert(lclNum < lvaCount);
- noway_assert(lclNum < lvaCount);
- varDsc = lvaTable + lclNum;
+ LclVarDsc* const varDsc = &lvaTable[lclNum];
// We should never encounter a reference to a lclVar that has a zero refCnt.
if (varDsc->lvRefCnt == 0 && (!varTypeIsPromotable(varDsc) || !varDsc->lvPromoted))
@@ -56,121 +37,80 @@ void Compiler::fgMarkUseDef(GenTreeLclVarCommon* tree, GenTree* asgdLclVar)
varDsc->lvRefCnt = 1;
}
- // NOTE: the analysis done below is neither necessary nor correct for LIR: it depends on
- // the nodes that precede `asgdLclVar` in execution order to factor into the dataflow for the
- // value being assigned to the local var, which is not necessarily the case without tree
- // order. Furthermore, LIR is always traversed in an order that reflects the dataflow for the
- // block.
- if (asgdLclVar != nullptr)
- {
- assert(!compCurBB->IsLIR());
-
- /* we have an assignment to a local var : asgdLclVar = ... tree ...
- * check for x = f(x) case */
+ const bool isDef = (tree->gtFlags & GTF_VAR_DEF) != 0;
+ const bool isUse = !isDef || ((tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) != 0);
- noway_assert(asgdLclVar->gtOper == GT_LCL_VAR || asgdLclVar->gtOper == GT_STORE_LCL_VAR);
- noway_assert(asgdLclVar->gtFlags & GTF_VAR_DEF);
+ if (varDsc->lvTracked)
+ {
+ assert(varDsc->lvVarIndex < lvaTrackedCount);
- lhsLclNum = asgdLclVar->gtLclVarCommon.gtLclNum;
+ // We don't treat stores to tracked locals as modifications of ByrefExposed memory;
+ // Make sure no tracked local is addr-exposed, to make sure we don't incorrectly CSE byref
+ // loads aliasing it across a store to it.
+ assert(!varDsc->lvAddrExposed);
- if ((lhsLclNum == lclNum) && ((tree->gtFlags & GTF_VAR_DEF) == 0) && (tree != asgdLclVar))
+ if (isUse && !VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
{
- /* bingo - we have an x = f(x) case */
- asgdLclVar->gtFlags |= GTF_VAR_USEDEF;
- rhsUSEDEF = true;
+ // This is an exposed use; add it to the set of uses.
+ VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
}
- }
- /* Is this a tracked variable? */
-
- if (varDsc->lvTracked)
- {
- noway_assert(varDsc->lvVarIndex < lvaTrackedCount);
-
- if ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0)
+ if (isDef)
{
- // if (!(fgCurUseSet & bitMask)) printf("V%02u,T%02u def at %08p\n", lclNum, varDsc->lvVarIndex, tree);
+ // This is a def, add it to the set of defs.
VarSetOps::AddElemD(this, fgCurDefSet, varDsc->lvVarIndex);
}
- else
+ }
+ else
+ {
+ if (varDsc->lvAddrExposed)
{
- // if (!(fgCurDefSet & bitMask))
- // {
- // printf("V%02u,T%02u use at ", lclNum, varDsc->lvVarIndex);
- // printTreeID(tree);
- // printf("\n");
- // }
-
- /* We have the following scenarios:
- * 1. "x += something" - in this case x is flagged GTF_VAR_USEASG
- * 2. "x = ... x ..." - the LHS x is flagged GTF_VAR_USEDEF,
- * the RHS x is has rhsUSEDEF = true
- * (both set by the code above)
- *
- * We should not mark an USE of x in the above cases provided the value "x" is not used
- * further up in the tree. For example "while (i++)" is required to mark i as used.
- */
+ // Reflect the effect on ByrefExposed memory
- /* make sure we don't include USEDEF variables in the USE set
- * The first test is for LSH, the second (!rhsUSEDEF) is for any var in the RHS */
-
- if ((tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0)
+ if (isUse)
{
- /* Not a special flag - check to see if used to assign to itself */
-
- if (rhsUSEDEF)
- {
- /* assign to itself - do not include it in the USE set */
- if (!opts.MinOpts() && !opts.compDbgCode)
- {
- return;
- }
- }
+ fgCurMemoryUse |= memoryKindSet(ByrefExposed);
}
-
- /* Fall through for the "good" cases above - add the variable to the USE set */
-
- if (!VarSetOps::IsMember(this, fgCurDefSet, varDsc->lvVarIndex))
+ if (isDef)
{
- VarSetOps::AddElemD(this, fgCurUseSet, varDsc->lvVarIndex);
- }
+ fgCurMemoryDef |= memoryKindSet(ByrefExposed);
- // For defs, also add to the (all) def set.
- if ((tree->gtFlags & GTF_VAR_DEF) != 0)
- {
- VarSetOps::AddElemD(this, fgCurDefSet, varDsc->lvVarIndex);
+ // We've found a store that modifies ByrefExposed
+ // memory but not GcHeap memory, so track their
+ // states separately.
+ byrefStatesMatchGcHeapStates = false;
}
}
- }
- else if (varTypeIsStruct(varDsc))
- {
- noway_assert(!varDsc->lvTracked);
- lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
-
- if (promotionType != PROMOTION_TYPE_NONE)
+ if (varTypeIsStruct(varDsc))
{
- VARSET_TP VARSET_INIT_NOCOPY(bitMask, VarSetOps::MakeEmpty(this));
+ lvaPromotionType promotionType = lvaGetPromotionType(varDsc);
- for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
+ if (promotionType != PROMOTION_TYPE_NONE)
{
- noway_assert(lvaTable[i].lvIsStructField);
- if (lvaTable[i].lvTracked)
+ VARSET_TP VARSET_INIT_NOCOPY(bitMask, VarSetOps::MakeEmpty(this));
+
+ for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
{
- noway_assert(lvaTable[i].lvVarIndex < lvaTrackedCount);
- VarSetOps::AddElemD(this, bitMask, lvaTable[i].lvVarIndex);
+ noway_assert(lvaTable[i].lvIsStructField);
+ if (lvaTable[i].lvTracked)
+ {
+ noway_assert(lvaTable[i].lvVarIndex < lvaTrackedCount);
+ VarSetOps::AddElemD(this, bitMask, lvaTable[i].lvVarIndex);
+ }
}
- }
- // For pure defs (i.e. not an "update" def which is also a use), add to the (all) def set.
- if ((tree->gtFlags & GTF_VAR_DEF) != 0 && (tree->gtFlags & (GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0)
- {
- VarSetOps::UnionD(this, fgCurDefSet, bitMask);
- }
- else if (!VarSetOps::IsSubset(this, bitMask, fgCurDefSet))
- {
- // Mark as used any struct fields that are not yet defined.
- VarSetOps::UnionD(this, fgCurUseSet, bitMask);
+ // For pure defs (i.e. not an "update" def which is also a use), add to the (all) def set.
+ if (!isUse)
+ {
+ assert(isDef);
+ VarSetOps::UnionD(this, fgCurDefSet, bitMask);
+ }
+ else if (!VarSetOps::IsSubset(this, bitMask, fgCurDefSet))
+ {
+ // Mark as used any struct fields that are not yet defined.
+ VarSetOps::UnionD(this, fgCurUseSet, bitMask);
+ }
}
}
}
@@ -285,18 +225,15 @@ void Compiler::fgLocalVarLivenessInit()
#ifndef LEGACY_BACKEND
//------------------------------------------------------------------------
// fgPerNodeLocalVarLiveness:
-// Set fgCurHeapUse and fgCurHeapDef when the global heap is read or updated
+// Set fgCurMemoryUse and fgCurMemoryDef when memory is read or updated
// Call fgMarkUseDef for any Local variables encountered
//
// Arguments:
// tree - The current node.
-// asgdLclVar - Either nullptr or the assignement's left-hand-side GT_LCL_VAR.
-// Used as an argument to fgMarkUseDef(); only valid for HIR blocks.
//
-void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree, GenTree* asgdLclVar)
+void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree)
{
assert(tree != nullptr);
- assert(asgdLclVar == nullptr || !compCurBB->IsLIR());
switch (tree->gtOper)
{
@@ -312,42 +249,43 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree, GenTree* asgdLclVar)
case GT_LCL_FLD_ADDR:
case GT_STORE_LCL_VAR:
case GT_STORE_LCL_FLD:
- fgMarkUseDef(tree->AsLclVarCommon(), asgdLclVar);
+ fgMarkUseDef(tree->AsLclVarCommon());
break;
case GT_CLS_VAR:
- // For Volatile indirection, first mutate the global heap
- // see comments in ValueNum.cpp (under case GT_CLS_VAR)
- // This models Volatile reads as def-then-use of the heap.
- // and allows for a CSE of a subsequent non-volatile read
+ // For Volatile indirection, first mutate GcHeap/ByrefExposed.
+ // See comments in ValueNum.cpp (under case GT_CLS_VAR)
+ // This models Volatile reads as def-then-use of memory
+ // and allows for a CSE of a subsequent non-volatile read.
if ((tree->gtFlags & GTF_FLD_VOLATILE) != 0)
{
// For any Volatile indirection, we must handle it as a
- // definition of the global heap
- fgCurHeapDef = true;
+ // definition of GcHeap/ByrefExposed
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
}
- // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a heap def, when we get to assignment.
+ // If the GT_CLS_VAR is the lhs of an assignment, we'll handle it as a GcHeap/ByrefExposed def, when we get
+ // to the assignment.
// Otherwise, we treat it as a use here.
- if (!fgCurHeapDef && (tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
+ if ((tree->gtFlags & GTF_CLS_VAR_ASG_LHS) == 0)
{
- fgCurHeapUse = true;
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
}
break;
case GT_IND:
- // For Volatile indirection, first mutate the global heap
+ // For Volatile indirection, first mutate GcHeap/ByrefExposed
// see comments in ValueNum.cpp (under case GT_CLS_VAR)
- // This models Volatile reads as def-then-use of the heap.
+ // This models Volatile reads as def-then-use of memory.
// and allows for a CSE of a subsequent non-volatile read
if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
{
// For any Volatile indirection, we must handle it as a
- // definition of the global heap
- fgCurHeapDef = true;
+ // definition of the GcHeap/ByrefExposed
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
}
// If the GT_IND is the lhs of an assignment, we'll handle it
- // as a heap def, when we get to assignment.
+ // as a memory def, when we get to assignment.
// Otherwise, we treat it as a use here.
if ((tree->gtFlags & GTF_IND_ASG_LHS) == 0)
{
@@ -356,16 +294,13 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree, GenTree* asgdLclVar)
GenTreePtr addrArg = tree->gtOp.gtOp1->gtEffectiveVal(/*commaOnly*/ true);
if (!addrArg->DefinesLocalAddr(this, /*width doesn't matter*/ 0, &dummyLclVarTree, &dummyIsEntire))
{
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
}
else
{
// Defines a local addr
assert(dummyLclVarTree != nullptr);
- fgMarkUseDef(dummyLclVarTree->AsLclVarCommon(), asgdLclVar);
+ fgMarkUseDef(dummyLclVarTree->AsLclVarCommon());
}
}
break;
@@ -376,25 +311,22 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree, GenTree* asgdLclVar)
unreached();
break;
- // We'll assume these are use-then-defs of the heap.
+ // We'll assume these are use-then-defs of memory.
case GT_LOCKADD:
case GT_XADD:
case GT_XCHG:
case GT_CMPXCHG:
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
- fgCurHeapDef = true;
- fgCurHeapHavoc = true;
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
break;
case GT_MEMORYBARRIER:
- // Simliar to any Volatile indirection, we must handle this as a definition of the global heap
- fgCurHeapDef = true;
+ // Simliar to any Volatile indirection, we must handle this as a definition of GcHeap/ByrefExposed
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
break;
- // For now, all calls read/write the heap, the latter in its entirety. Might tighten this case later.
+ // For now, all calls read/write GcHeap/ByrefExposed, writes in their entirety. Might tighten this case later.
case GT_CALL:
{
GenTreeCall* call = tree->AsCall();
@@ -410,12 +342,9 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree, GenTree* asgdLclVar)
}
if (modHeap)
{
- if (!fgCurHeapDef)
- {
- fgCurHeapUse = true;
- }
- fgCurHeapDef = true;
- fgCurHeapHavoc = true;
+ fgCurMemoryUse |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
+ fgCurMemoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
}
}
@@ -451,35 +380,32 @@ void Compiler::fgPerNodeLocalVarLiveness(GenTree* tree, GenTree* asgdLclVar)
default:
- // Determine whether it defines a heap location.
+ // Determine what memory locations it defines.
if (tree->OperIsAssignment() || tree->OperIsBlkOp())
{
GenTreeLclVarCommon* dummyLclVarTree = nullptr;
- if (!tree->DefinesLocal(this, &dummyLclVarTree))
+ if (tree->DefinesLocal(this, &dummyLclVarTree))
+ {
+ if (lvaVarAddrExposed(dummyLclVarTree->gtLclNum))
+ {
+ fgCurMemoryDef |= memoryKindSet(ByrefExposed);
+
+ // We've found a store that modifies ByrefExposed
+ // memory but not GcHeap memory, so track their
+ // states separately.
+ byrefStatesMatchGcHeapStates = false;
+ }
+ }
+ else
{
- // If it doesn't define a local, then it might update the heap.
- fgCurHeapDef = true;
+ // If it doesn't define a local, then it might update GcHeap/ByrefExposed.
+ fgCurMemoryDef |= memoryKindSet(GcHeap, ByrefExposed);
}
}
break;
}
}
-void Compiler::fgPerStatementLocalVarLiveness(GenTree* startNode, GenTree* asgdLclVar)
-{
- // The startNode must be the 1st node of the statement.
- assert(startNode == compCurStmt->gtStmt.gtStmtList);
-
- // The asgdLclVar node must be either nullptr or a GT_LCL_VAR or GT_STORE_LCL_VAR
- assert((asgdLclVar == nullptr) || (asgdLclVar->gtOper == GT_LCL_VAR || asgdLclVar->gtOper == GT_STORE_LCL_VAR));
-
- // We always walk every node in statement list
- for (GenTreePtr node = startNode; node != nullptr; node = node->gtNext)
- {
- fgPerNodeLocalVarLiveness(node, asgdLclVar);
- }
-}
-
#endif // !LEGACY_BACKEND
/*****************************************************************************/
@@ -524,10 +450,10 @@ void Compiler::fgPerBlockLocalVarLiveness()
VarSetOps::Assign(this, block->bbVarDef, liveAll);
VarSetOps::Assign(this, block->bbLiveIn, liveAll);
VarSetOps::Assign(this, block->bbLiveOut, liveAll);
- block->bbHeapUse = true;
- block->bbHeapDef = true;
- block->bbHeapLiveIn = true;
- block->bbHeapLiveOut = true;
+ block->bbMemoryUse = fullMemoryKindSet;
+ block->bbMemoryDef = fullMemoryKindSet;
+ block->bbMemoryLiveIn = fullMemoryKindSet;
+ block->bbMemoryLiveOut = fullMemoryKindSet;
switch (block->bbJumpKind)
{
@@ -540,6 +466,11 @@ void Compiler::fgPerBlockLocalVarLiveness()
break;
}
}
+
+ // In minopts, we don't explicitly build SSA or value-number; GcHeap and
+ // ByrefExposed implicitly (conservatively) change state at each instr.
+ byrefStatesMatchGcHeapStates = true;
+
return;
}
@@ -549,77 +480,34 @@ void Compiler::fgPerBlockLocalVarLiveness()
VarSetOps::AssignNoCopy(this, fgCurUseSet, VarSetOps::MakeEmpty(this));
VarSetOps::AssignNoCopy(this, fgCurDefSet, VarSetOps::MakeEmpty(this));
+ // GC Heap and ByrefExposed can share states unless we see a def of byref-exposed
+ // memory that is not a GC Heap def.
+ byrefStatesMatchGcHeapStates = true;
+
for (block = fgFirstBB; block; block = block->bbNext)
{
- GenTreePtr stmt;
- GenTreePtr tree;
- GenTreePtr asgdLclVar;
-
VarSetOps::ClearD(this, fgCurUseSet);
VarSetOps::ClearD(this, fgCurDefSet);
- fgCurHeapUse = false;
- fgCurHeapDef = false;
- fgCurHeapHavoc = false;
+ fgCurMemoryUse = emptyMemoryKindSet;
+ fgCurMemoryDef = emptyMemoryKindSet;
+ fgCurMemoryHavoc = emptyMemoryKindSet;
compCurBB = block;
-
if (!block->IsLIR())
{
- for (stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNext)
+ for (GenTreeStmt* stmt = block->FirstNonPhiDef(); stmt; stmt = stmt->gtNextStmt)
{
- noway_assert(stmt->gtOper == GT_STMT);
-
compCurStmt = stmt;
- asgdLclVar = nullptr;
- tree = stmt->gtStmt.gtStmtExpr;
- noway_assert(tree);
-
- // The following code checks if we have an assignment expression
- // which may become a GTF_VAR_USEDEF - x=f(x).
- // consider if LHS is local var - ignore if RHS contains SIDE_EFFECTS
-
- if ((tree->gtOper == GT_ASG && tree->gtOp.gtOp1->gtOper == GT_LCL_VAR) ||
- tree->gtOper == GT_STORE_LCL_VAR)
- {
- noway_assert(tree->gtOp.gtOp1);
- GenTreePtr rhsNode;
- if (tree->gtOper == GT_ASG)
- {
- noway_assert(tree->gtOp.gtOp2);
- asgdLclVar = tree->gtOp.gtOp1;
- rhsNode = tree->gtOp.gtOp2;
- }
- else
- {
- asgdLclVar = tree;
- rhsNode = tree->gtOp.gtOp1;
- }
-
- // If this is an assignment to local var with no SIDE EFFECTS,
- // set asgdLclVar so that genMarkUseDef will flag potential
- // x=f(x) expressions as GTF_VAR_USEDEF.
- // Reset the flag before recomputing it - it may have been set before,
- // but subsequent optimizations could have removed the rhs reference.
- asgdLclVar->gtFlags &= ~GTF_VAR_USEDEF;
- if ((rhsNode->gtFlags & GTF_SIDE_EFFECT) == 0)
- {
- noway_assert(asgdLclVar->gtFlags & GTF_VAR_DEF);
- }
- else
- {
- asgdLclVar = nullptr;
- }
- }
-
#ifdef LEGACY_BACKEND
- tree = fgLegacyPerStatementLocalVarLiveness(stmt->gtStmt.gtStmtList, NULL, asgdLclVar);
-
- // We must have walked to the end of this statement.
- noway_assert(!tree);
+ GenTree* tree = fgLegacyPerStatementLocalVarLiveness(stmt->gtStmtList, nullptr);
+ assert(tree == nullptr);
#else // !LEGACY_BACKEND
- fgPerStatementLocalVarLiveness(stmt->gtStmt.gtStmtList, asgdLclVar);
+ for (GenTree* node = stmt->gtStmtList; node != nullptr; node = node->gtNext)
+ {
+ fgPerNodeLocalVarLiveness(node);
+ }
#endif // !LEGACY_BACKEND
}
}
@@ -628,13 +516,9 @@ void Compiler::fgPerBlockLocalVarLiveness()
#ifdef LEGACY_BACKEND
unreached();
#else // !LEGACY_BACKEND
- // NOTE: the `asgdLclVar` analysis done above is not correct for LIR: it depends
- // on all of the nodes that precede `asgdLclVar` in execution order to factor into the
- // dataflow for the value being assigned to the local var, which is not necessarily the
- // case without tree order. As a result, we simply pass `nullptr` for `asgdLclVar`.
for (GenTree* node : LIR::AsRange(block).NonPhiNodes())
{
- fgPerNodeLocalVarLiveness(node, nullptr);
+ fgPerNodeLocalVarLiveness(node);
}
#endif // !LEGACY_BACKEND
}
@@ -667,19 +551,25 @@ void Compiler::fgPerBlockLocalVarLiveness()
printf("BB%02u", block->bbNum);
printf(" USE(%d)=", VarSetOps::Count(this, fgCurUseSet));
lvaDispVarSet(fgCurUseSet, allVars);
- if (fgCurHeapUse)
+ for (MemoryKind memoryKind : allMemoryKinds())
{
- printf(" + HEAP");
+ if ((fgCurMemoryUse & memoryKindSet(memoryKind)) != 0)
+ {
+ printf(" + %s", memoryKindNames[memoryKind]);
+ }
}
printf("\n DEF(%d)=", VarSetOps::Count(this, fgCurDefSet));
lvaDispVarSet(fgCurDefSet, allVars);
- if (fgCurHeapDef)
+ for (MemoryKind memoryKind : allMemoryKinds())
{
- printf(" + HEAP");
- }
- if (fgCurHeapHavoc)
- {
- printf("*");
+ if ((fgCurMemoryDef & memoryKindSet(memoryKind)) != 0)
+ {
+ printf(" + %s", memoryKindNames[memoryKind]);
+ }
+ if ((fgCurMemoryHavoc & memoryKindSet(memoryKind)) != 0)
+ {
+ printf("*");
+ }
}
printf("\n\n");
}
@@ -687,15 +577,23 @@ void Compiler::fgPerBlockLocalVarLiveness()
VarSetOps::Assign(this, block->bbVarUse, fgCurUseSet);
VarSetOps::Assign(this, block->bbVarDef, fgCurDefSet);
- block->bbHeapUse = fgCurHeapUse;
- block->bbHeapDef = fgCurHeapDef;
- block->bbHeapHavoc = fgCurHeapHavoc;
+ block->bbMemoryUse = fgCurMemoryUse;
+ block->bbMemoryDef = fgCurMemoryDef;
+ block->bbMemoryHavoc = fgCurMemoryHavoc;
/* also initialize the IN set, just in case we will do multiple DFAs */
VarSetOps::AssignNoCopy(this, block->bbLiveIn, VarSetOps::MakeEmpty(this));
- block->bbHeapLiveIn = false;
+ block->bbMemoryLiveIn = emptyMemoryKindSet;
}
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("** Memory liveness computed, GcHeap states and ByrefExposed states %s\n",
+ (byrefStatesMatchGcHeapStates ? "match" : "diverge"));
+ }
+#endif // DEBUG
}
// Helper functions to mark variables live over their entire scope
@@ -1226,181 +1124,218 @@ VARSET_VALRET_TP Compiler::fgGetHandlerLiveVars(BasicBlock* block)
return liveVars;
}
-/*****************************************************************************
- *
- * This is the classic algorithm for Live Variable Analysis.
- * If updateInternalOnly==true, only update BBF_INTERNAL blocks.
- */
-
-void Compiler::fgLiveVarAnalysis(bool updateInternalOnly)
+class LiveVarAnalysis
{
- BasicBlock* block;
- bool change;
-#ifdef DEBUG
- VARSET_TP VARSET_INIT_NOCOPY(extraLiveOutFromFinally, VarSetOps::MakeEmpty(this));
-#endif // DEBUG
- bool keepAliveThis = lvaKeepAliveAndReportThis() && lvaTable[info.compThisArg].lvTracked;
+ Compiler* m_compiler;
- /* Live Variable Analysis - Backward dataflow */
+ bool m_hasPossibleBackEdge;
- bool hasPossibleBackEdge = false;
+ unsigned m_memoryLiveIn;
+ unsigned m_memoryLiveOut;
+ VARSET_TP m_liveIn;
+ VARSET_TP m_liveOut;
- do
+ LiveVarAnalysis(Compiler* compiler)
+ : m_compiler(compiler)
+ , m_hasPossibleBackEdge(false)
+ , m_memoryLiveIn(emptyMemoryKindSet)
+ , m_memoryLiveOut(emptyMemoryKindSet)
+ , m_liveIn(VarSetOps::MakeEmpty(compiler))
+ , m_liveOut(VarSetOps::MakeEmpty(compiler))
{
- change = false;
-
- /* Visit all blocks and compute new data flow values */
-
- VARSET_TP VARSET_INIT_NOCOPY(liveIn, VarSetOps::MakeEmpty(this));
- VARSET_TP VARSET_INIT_NOCOPY(liveOut, VarSetOps::MakeEmpty(this));
-
- bool heapLiveIn = false;
- bool heapLiveOut = false;
+ }
- for (block = fgLastBB; block; block = block->bbPrev)
+ bool PerBlockAnalysis(BasicBlock* block, bool updateInternalOnly, bool keepAliveThis)
+ {
+ /* Compute the 'liveOut' set */
+ VarSetOps::ClearD(m_compiler, m_liveOut);
+ m_memoryLiveOut = emptyMemoryKindSet;
+ if (block->endsWithJmpMethod(m_compiler))
{
- // sometimes block numbers are not monotonically increasing which
- // would cause us not to identify backedges
- if (block->bbNext && block->bbNext->bbNum <= block->bbNum)
+ // A JMP uses all the arguments, so mark them all
+ // as live at the JMP instruction
+ //
+ const LclVarDsc* varDscEndParams = m_compiler->lvaTable + m_compiler->info.compArgsCount;
+ for (LclVarDsc* varDsc = m_compiler->lvaTable; varDsc < varDscEndParams; varDsc++)
{
- hasPossibleBackEdge = true;
+ noway_assert(!varDsc->lvPromoted);
+ if (varDsc->lvTracked)
+ {
+ VarSetOps::AddElemD(m_compiler, m_liveOut, varDsc->lvVarIndex);
+ }
}
+ }
- if (updateInternalOnly)
+ // Additionally, union in all the live-in tracked vars of successors.
+ AllSuccessorIter succsEnd = block->GetAllSuccs(m_compiler).end();
+ for (AllSuccessorIter succs = block->GetAllSuccs(m_compiler).begin(); succs != succsEnd; ++succs)
+ {
+ BasicBlock* succ = (*succs);
+ VarSetOps::UnionD(m_compiler, m_liveOut, succ->bbLiveIn);
+ m_memoryLiveOut |= (*succs)->bbMemoryLiveIn;
+ if (succ->bbNum <= block->bbNum)
{
- /* Only update BBF_INTERNAL blocks as they may be
- syntactically out of sequence. */
+ m_hasPossibleBackEdge = true;
+ }
+ }
- noway_assert(opts.compDbgCode && (info.compVarScopesCount > 0));
+ /* For lvaKeepAliveAndReportThis methods, "this" has to be kept alive everywhere
+ Note that a function may end in a throw on an infinite loop (as opposed to a return).
+ "this" has to be alive everywhere even in such methods. */
- if (!(block->bbFlags & BBF_INTERNAL))
- {
- continue;
- }
- }
+ if (keepAliveThis)
+ {
+ VarSetOps::AddElemD(m_compiler, m_liveOut, m_compiler->lvaTable[m_compiler->info.compThisArg].lvVarIndex);
+ }
- /* Compute the 'liveOut' set */
+ /* Compute the 'm_liveIn' set */
+ VarSetOps::Assign(m_compiler, m_liveIn, m_liveOut);
+ VarSetOps::DiffD(m_compiler, m_liveIn, block->bbVarDef);
+ VarSetOps::UnionD(m_compiler, m_liveIn, block->bbVarUse);
- VarSetOps::ClearD(this, liveOut);
- heapLiveOut = false;
- if (block->endsWithJmpMethod(this))
+ // Even if block->bbMemoryDef is set, we must assume that it doesn't kill memory liveness from m_memoryLiveOut,
+ // since (without proof otherwise) the use and def may touch different memory at run-time.
+ m_memoryLiveIn = m_memoryLiveOut | block->bbMemoryUse;
+
+ /* Can exceptions from this block be handled (in this function)? */
+
+ if (m_compiler->ehBlockHasExnFlowDsc(block))
+ {
+ VARSET_TP VARSET_INIT_NOCOPY(liveVars, m_compiler->fgGetHandlerLiveVars(block));
+
+ VarSetOps::UnionD(m_compiler, m_liveIn, liveVars);
+ VarSetOps::UnionD(m_compiler, m_liveOut, liveVars);
+ }
+
+ /* Has there been any change in either live set? */
+
+ bool liveInChanged = !VarSetOps::Equal(m_compiler, block->bbLiveIn, m_liveIn);
+ if (liveInChanged || !VarSetOps::Equal(m_compiler, block->bbLiveOut, m_liveOut))
+ {
+ if (updateInternalOnly)
{
- // A JMP uses all the arguments, so mark them all
- // as live at the JMP instruction
- //
- const LclVarDsc* varDscEndParams = lvaTable + info.compArgsCount;
- for (LclVarDsc* varDsc = lvaTable; varDsc < varDscEndParams; varDsc++)
+ // Only "extend" liveness over BBF_INTERNAL blocks
+
+ noway_assert(block->bbFlags & BBF_INTERNAL);
+
+ liveInChanged =
+ !VarSetOps::Equal(m_compiler, VarSetOps::Intersection(m_compiler, block->bbLiveIn, m_liveIn),
+ m_liveIn);
+ if (liveInChanged ||
+ !VarSetOps::Equal(m_compiler, VarSetOps::Intersection(m_compiler, block->bbLiveOut, m_liveOut),
+ m_liveOut))
{
- noway_assert(!varDsc->lvPromoted);
- if (varDsc->lvTracked)
+#ifdef DEBUG
+ if (m_compiler->verbose)
{
- VarSetOps::AddElemD(this, liveOut, varDsc->lvVarIndex);
+ printf("Scope info: block BB%02u LiveIn+ ", block->bbNum);
+ dumpConvertedVarSet(m_compiler, VarSetOps::Diff(m_compiler, m_liveIn, block->bbLiveIn));
+ printf(", LiveOut+ ");
+ dumpConvertedVarSet(m_compiler, VarSetOps::Diff(m_compiler, m_liveOut, block->bbLiveOut));
+ printf("\n");
}
- }
- }
+#endif // DEBUG
- // Additionally, union in all the live-in tracked vars of successors.
- AllSuccessorIter succsEnd = block->GetAllSuccs(this).end();
- for (AllSuccessorIter succs = block->GetAllSuccs(this).begin(); succs != succsEnd; ++succs)
- {
- BasicBlock* succ = (*succs);
- VarSetOps::UnionD(this, liveOut, succ->bbLiveIn);
- heapLiveOut = heapLiveOut || (*succs)->bbHeapLiveIn;
- if (succ->bbNum <= block->bbNum)
- {
- hasPossibleBackEdge = true;
+ VarSetOps::UnionD(m_compiler, block->bbLiveIn, m_liveIn);
+ VarSetOps::UnionD(m_compiler, block->bbLiveOut, m_liveOut);
}
}
-
- /* For lvaKeepAliveAndReportThis methods, "this" has to be kept alive everywhere
- Note that a function may end in a throw on an infinite loop (as opposed to a return).
- "this" has to be alive everywhere even in such methods. */
-
- if (keepAliveThis)
+ else
{
- VarSetOps::AddElemD(this, liveOut, lvaTable[info.compThisArg].lvVarIndex);
+ VarSetOps::Assign(m_compiler, block->bbLiveIn, m_liveIn);
+ VarSetOps::Assign(m_compiler, block->bbLiveOut, m_liveOut);
}
+ }
- /* Compute the 'liveIn' set */
+ const bool memoryLiveInChanged = (block->bbMemoryLiveIn != m_memoryLiveIn);
+ if (memoryLiveInChanged || (block->bbMemoryLiveOut != m_memoryLiveOut))
+ {
+ block->bbMemoryLiveIn = m_memoryLiveIn;
+ block->bbMemoryLiveOut = m_memoryLiveOut;
+ }
- VarSetOps::Assign(this, liveIn, liveOut);
- VarSetOps::DiffD(this, liveIn, block->bbVarDef);
- VarSetOps::UnionD(this, liveIn, block->bbVarUse);
+ return liveInChanged || memoryLiveInChanged;
+ }
- heapLiveIn = (heapLiveOut && !block->bbHeapDef) || block->bbHeapUse;
+ void Run(bool updateInternalOnly)
+ {
+ const bool keepAliveThis =
+ m_compiler->lvaKeepAliveAndReportThis() && m_compiler->lvaTable[m_compiler->info.compThisArg].lvTracked;
- /* Can exceptions from this block be handled (in this function)? */
+ /* Live Variable Analysis - Backward dataflow */
+ bool changed;
+ do
+ {
+ changed = false;
- if (ehBlockHasExnFlowDsc(block))
- {
- VARSET_TP VARSET_INIT_NOCOPY(liveVars, fgGetHandlerLiveVars(block));
+ /* Visit all blocks and compute new data flow values */
- VarSetOps::UnionD(this, liveIn, liveVars);
- VarSetOps::UnionD(this, liveOut, liveVars);
- }
+ VarSetOps::ClearD(m_compiler, m_liveIn);
+ VarSetOps::ClearD(m_compiler, m_liveOut);
- /* Has there been any change in either live set? */
+ m_memoryLiveIn = emptyMemoryKindSet;
+ m_memoryLiveOut = emptyMemoryKindSet;
- if (!VarSetOps::Equal(this, block->bbLiveIn, liveIn) || !VarSetOps::Equal(this, block->bbLiveOut, liveOut))
+ for (BasicBlock* block = m_compiler->fgLastBB; block; block = block->bbPrev)
{
+ // sometimes block numbers are not monotonically increasing which
+ // would cause us not to identify backedges
+ if (block->bbNext && block->bbNext->bbNum <= block->bbNum)
+ {
+ m_hasPossibleBackEdge = true;
+ }
+
if (updateInternalOnly)
{
- // Only "extend" liveness over BBF_INTERNAL blocks
+ /* Only update BBF_INTERNAL blocks as they may be
+ syntactically out of sequence. */
- noway_assert(block->bbFlags & BBF_INTERNAL);
+ noway_assert(m_compiler->opts.compDbgCode && (m_compiler->info.compVarScopesCount > 0));
- if (!VarSetOps::Equal(this, VarSetOps::Intersection(this, block->bbLiveIn, liveIn), liveIn) ||
- !VarSetOps::Equal(this, VarSetOps::Intersection(this, block->bbLiveOut, liveOut), liveOut))
+ if (!(block->bbFlags & BBF_INTERNAL))
{
-#ifdef DEBUG
- if (verbose)
- {
- printf("Scope info: block BB%02u LiveIn+ ", block->bbNum);
- dumpConvertedVarSet(this, VarSetOps::Diff(this, liveIn, block->bbLiveIn));
- printf(", LiveOut+ ");
- dumpConvertedVarSet(this, VarSetOps::Diff(this, liveOut, block->bbLiveOut));
- printf("\n");
- }
-#endif // DEBUG
-
- VarSetOps::UnionD(this, block->bbLiveIn, liveIn);
- VarSetOps::UnionD(this, block->bbLiveOut, liveOut);
- change = true;
+ continue;
}
}
- else
+
+ if (PerBlockAnalysis(block, updateInternalOnly, keepAliveThis))
{
- VarSetOps::Assign(this, block->bbLiveIn, liveIn);
- VarSetOps::Assign(this, block->bbLiveOut, liveOut);
- change = true;
+ changed = true;
}
}
-
- if ((block->bbHeapLiveIn == 1) != heapLiveIn || (block->bbHeapLiveOut == 1) != heapLiveOut)
+ // if there is no way we could have processed a block without seeing all of its predecessors
+ // then there is no need to iterate
+ if (!m_hasPossibleBackEdge)
{
- block->bbHeapLiveIn = heapLiveIn;
- block->bbHeapLiveOut = heapLiveOut;
- change = true;
+ break;
}
- }
- // if there is no way we could have processed a block without seeing all of its predecessors
- // then there is no need to iterate
- if (!hasPossibleBackEdge)
- {
- break;
- }
- } while (change);
+ } while (changed);
+ }
-//-------------------------------------------------------------------------
+public:
+ static void Run(Compiler* compiler, bool updateInternalOnly)
+ {
+ LiveVarAnalysis analysis(compiler);
+ analysis.Run(updateInternalOnly);
+ }
+};
-#ifdef DEBUG
+/*****************************************************************************
+ *
+ * This is the classic algorithm for Live Variable Analysis.
+ * If updateInternalOnly==true, only update BBF_INTERNAL blocks.
+ */
+
+void Compiler::fgLiveVarAnalysis(bool updateInternalOnly)
+{
+ LiveVarAnalysis::Run(this, updateInternalOnly);
+#ifdef DEBUG
if (verbose && !updateInternalOnly)
{
printf("\nBB liveness after fgLiveVarAnalysis():\n\n");
fgDispBBLiveness();
}
-
#endif // DEBUG
}
@@ -3090,15 +3025,21 @@ void Compiler::fgDispBBLiveness(BasicBlock* block)
printf("BB%02u", block->bbNum);
printf(" IN (%d)=", VarSetOps::Count(this, block->bbLiveIn));
lvaDispVarSet(block->bbLiveIn, allVars);
- if (block->bbHeapLiveIn)
+ for (MemoryKind memoryKind : allMemoryKinds())
{
- printf(" + HEAP");
+ if ((block->bbMemoryLiveIn & memoryKindSet(memoryKind)) != 0)
+ {
+ printf(" + %s", memoryKindNames[memoryKind]);
+ }
}
printf("\n OUT(%d)=", VarSetOps::Count(this, block->bbLiveOut));
lvaDispVarSet(block->bbLiveOut, allVars);
- if (block->bbHeapLiveOut)
+ for (MemoryKind memoryKind : allMemoryKinds())
{
- printf(" + HEAP");
+ if ((block->bbMemoryLiveOut & memoryKindSet(memoryKind)) != 0)
+ {
+ printf(" + %s", memoryKindNames[memoryKind]);
+ }
}
printf("\n\n");
}
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index a6e50b304c..0316a34a21 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -167,8 +167,13 @@ GenTree* Lowering::LowerNode(GenTree* node)
case GT_STORE_BLK:
case GT_STORE_OBJ:
case GT_STORE_DYN_BLK:
- LowerBlockStore(node->AsBlk());
- break;
+ {
+ // TODO-Cleanup: Consider moving this code to LowerBlockStore, which is currently
+ // called from TreeNodeInfoInitBlockStore, and calling that method here.
+ GenTreeBlk* blkNode = node->AsBlk();
+ TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false);
+ }
+ break;
#ifdef FEATURE_SIMD
case GT_SIMD:
@@ -236,20 +241,14 @@ GenTree* Lowering::LowerNode(GenTree* node)
unsigned varNum = node->AsLclVarCommon()->GetLclNum();
LclVarDsc* varDsc = &comp->lvaTable[varNum];
-#if defined(_TARGET_64BIT_)
- assert(varDsc->lvSize() == 16);
- node->gtType = TYP_SIMD16;
-#else // !_TARGET_64BIT_
- if (varDsc->lvSize() == 16)
+ if (comp->lvaMapSimd12ToSimd16(varDsc))
{
+ JITDUMP("Mapping TYP_SIMD12 lclvar node to TYP_SIMD16:\n");
+ DISPNODE(node);
+ JITDUMP("============");
+
node->gtType = TYP_SIMD16;
}
- else
- {
- // The following assert is guaranteed by lvSize().
- assert(varDsc->lvIsParam);
- }
-#endif // !_TARGET_64BIT_
}
#endif // FEATURE_SIMD
__fallthrough;
@@ -549,7 +548,7 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
// If the number of possible destinations is small enough, we proceed to expand the switch
// into a series of conditional branches, otherwise we follow the jump table based switch
// transformation.
- else if (jumpCnt < minSwitchTabJumpCnt)
+ else if ((jumpCnt < minSwitchTabJumpCnt) || comp->compStressCompile(Compiler::STRESS_SWITCH_CMP_BR_EXPANSION, 50))
{
// Lower the switch into a series of compare and branch IR trees.
//
@@ -639,7 +638,7 @@ GenTree* Lowering::LowerSwitch(GenTree* node)
GenTreePtr gtCaseBranch = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, gtCaseCond);
LIR::Range caseRange = LIR::SeqTree(comp, gtCaseBranch);
- currentBBRange->InsertAtEnd(std::move(condRange));
+ currentBBRange->InsertAtEnd(std::move(caseRange));
}
}
@@ -757,16 +756,6 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
GenTreePtr putArg = nullptr;
bool updateArgTable = true;
-#if !defined(_TARGET_64BIT_)
- if (varTypeIsLong(type))
- {
- // For TYP_LONG, we leave the GT_LONG as the arg, and put the putArg below it.
- // Therefore, we don't update the arg table entry.
- updateArgTable = false;
- type = TYP_INT;
- }
-#endif // !defined(_TARGET_64BIT_)
-
bool isOnStack = true;
#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
if (varTypeIsStruct(type))
@@ -954,6 +943,11 @@ GenTreePtr Lowering::NewPutArg(GenTreeCall* call, GenTreePtr arg, fgArgTabEntryP
info->slotNum PUT_STRUCT_ARG_STK_ONLY_ARG(info->numSlots) DEBUGARG(call));
#endif
+#if defined(UNIX_X86_ABI)
+ assert((info->padStkAlign > 0 && info->numSlots > 0) || (info->padStkAlign == 0));
+ putArg->AsPutArgStk()->setArgPadding(info->padStkAlign);
+#endif
+
#ifdef FEATURE_PUT_STRUCT_ARG_STK
// If the ArgTabEntry indicates that this arg is a struct
// get and store the number of slots that are references.
@@ -1084,25 +1078,22 @@ void Lowering::LowerArg(GenTreeCall* call, GenTreePtr* ppArg)
NYI("Lowering of long register argument");
}
- // For longs, we will create two PUTARG_STKs below the GT_LONG. The hi argument needs to
- // be pushed first, so the hi PUTARG_STK will precede the lo PUTARG_STK in execution order.
+ // For longs, we will replace the GT_LONG with a GT_FIELD_LIST, and put that under a PUTARG_STK.
+ // Although the hi argument needs to be pushed first, that will be handled by the general case,
+ // in which the fields will be reversed.
noway_assert(arg->OperGet() == GT_LONG);
- GenTreePtr argLo = arg->gtGetOp1();
- GenTreePtr argHi = arg->gtGetOp2();
-
- GenTreePtr putArgLo = NewPutArg(call, argLo, info, type);
- GenTreePtr putArgHi = NewPutArg(call, argHi, info, type);
-
- arg->gtOp.gtOp1 = putArgLo;
- arg->gtOp.gtOp2 = putArgHi;
-
- BlockRange().InsertBefore(arg, putArgHi, putArgLo);
-
- // The execution order now looks like this:
- // argLoPrev <-> argLoFirst ... argLo <-> argHiFirst ... argHi <-> putArgHi <-> putArgLo <-> arg(GT_LONG)
-
- assert((arg->gtFlags & GTF_REVERSE_OPS) == 0);
- arg->gtFlags |= GTF_REVERSE_OPS; // We consume the high arg (op2) first.
+ assert(info->numSlots == 2);
+ GenTreePtr argLo = arg->gtGetOp1();
+ GenTreePtr argHi = arg->gtGetOp2();
+ GenTreeFieldList* fieldList = new (comp, GT_FIELD_LIST) GenTreeFieldList(argLo, 0, TYP_INT, nullptr);
+ // Only the first fieldList node (GTF_FIELD_LIST_HEAD) is in the instruction sequence.
+ (void)new (comp, GT_FIELD_LIST) GenTreeFieldList(argHi, 4, TYP_INT, fieldList);
+ putArg = NewPutArg(call, fieldList, info, TYP_VOID);
+
+ // We can't call ReplaceArgWithPutArgOrCopy here because it presumes that we are keeping the original arg.
+ BlockRange().InsertBefore(arg, fieldList, putArg);
+ BlockRange().Remove(arg);
+ *ppArg = putArg;
}
else
#endif // !defined(_TARGET_64BIT_)
@@ -1872,6 +1863,7 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget
bool isClosed;
LIR::ReadOnlyRange secondArgRange = BlockRange().GetTreeRange(arg0, &isClosed);
assert(isClosed);
+ BlockRange().Remove(std::move(secondArgRange));
argEntry->node->gtOp.gtOp1 = callTarget;
@@ -1935,251 +1927,439 @@ GenTree* Lowering::LowerTailCallViaHelper(GenTreeCall* call, GenTree* callTarget
}
//------------------------------------------------------------------------
-// Lowering::LowerCompare: lowers a compare node.
-//
-// For 64-bit targets, this doesn't do much of anything: all comparisons
-// that we support can be handled in code generation on such targets.
-//
-// For 32-bit targets, however, any comparison that feeds a `GT_JTRUE`
-// node must be lowered such that the liveness of the operands to the
-// comparison is properly visible to the rest of the backend. As such,
-// a 64-bit comparison is lowered from something like this:
-//
-// ------------ BB02 [004..014) -> BB02 (cond), preds={BB02,BB01} succs={BB03,BB02}
-// N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
-//
-// /--* t6 int
-// N002 ( 2, 3) [000007] ---------U-- t7 = * cast long <- ulong <- uint $3c0
+// Lowering::LowerCompare: Lowers a compare node.
//
-// N003 ( 3, 10) [000009] ------------ t9 = lconst long 0x0000000000000003 $101
-//
-// /--* t7 long
-// +--* t9 long
-// N004 ( 9, 17) [000010] N------N-U-- t10 = * < int $149
-//
-// /--* t10 int
-// N005 ( 11, 19) [000011] ------------ * jmpTrue void
-//
-// To something like this:
-//
-// ------------ BB02 [004..014) -> BB03 (cond), preds={BB06,BB07,BB01} succs={BB06,BB03}
-// [000099] ------------ t99 = const int 0
-//
-// [000101] ------------ t101 = const int 0
-//
-// /--* t99 int
-// +--* t101 int
-// N004 ( 9, 17) [000010] N------N-U-- t10 = * > int $149
-//
-// /--* t10 int
-// N005 ( 11, 19) [000011] ------------ * jmpTrue void
-//
-//
-// ------------ BB06 [???..???) -> BB02 (cond), preds={BB02} succs={BB07,BB02}
-// [000105] -------N-U-- jcc void cond=<
-//
-//
-// ------------ BB07 [???..???) -> BB02 (cond), preds={BB06} succs={BB03,BB02}
-// N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
-//
-// N003 ( 3, 10) [000009] ------------ t9 = const int 3
-//
-// /--* t6 int
-// +--* t9 int
-// [000106] N------N-U-- t106 = * < int
-//
-// /--* t106 int
-// [000107] ------------ * jmpTrue void
-//
-// Which will eventually generate code similar to the following:
-//
-// 33DB xor ebx, ebx
-// 85DB test ebx, ebx
-// 7707 ja SHORT G_M50523_IG04
-// 72E7 jb SHORT G_M50523_IG03
-// 83F803 cmp eax, 3
-// 72E2 jb SHORT G_M50523_IG03
+// Arguments:
+// cmp - the compare node
//
+// Notes:
+// - Decomposes long comparisons that feed a GT_JTRUE (32 bit specific).
+// - Ensures that we don't have a mix of int/long operands (XARCH specific).
+// - Narrow operands to enable memory operand containment (XARCH specific).
+// - Transform cmp(and(x, y), 0) into test(x, y) (XARCH specific but could
+// be used for ARM as well if support for GT_TEST_EQ/GT_TEST_NE is added).
+
void Lowering::LowerCompare(GenTree* cmp)
{
#ifndef _TARGET_64BIT_
- if (cmp->gtGetOp1()->TypeGet() != TYP_LONG)
- {
- return;
- }
-
LIR::Use cmpUse;
- if (!BlockRange().TryGetUse(cmp, &cmpUse) || cmpUse.User()->OperGet() != GT_JTRUE)
+ if ((cmp->gtGetOp1()->TypeGet() == TYP_LONG) && BlockRange().TryGetUse(cmp, &cmpUse) &&
+ cmpUse.User()->OperIs(GT_JTRUE))
{
- return;
- }
+ // For 32-bit targets any comparison that feeds a `GT_JTRUE` node must be lowered such that
+ // the liveness of the operands to the comparison is properly visible to the rest of the
+ // backend. As such, a 64-bit comparison is lowered from something like this:
+ //
+ // ------------ BB02 [004..014) -> BB02 (cond), preds={BB02,BB01} succs={BB03,BB02}
+ // N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
+ //
+ // /--* t6 int
+ // N002 ( 2, 3) [000007] ---------U-- t7 = * cast long <- ulong <- uint $3c0
+ //
+ // N003 ( 3, 10) [000009] ------------ t9 = lconst long 0x0000000000000003 $101
+ //
+ // /--* t7 long
+ // +--* t9 long
+ // N004 ( 9, 17) [000010] N------N-U-- t10 = * < int $149
+ //
+ // /--* t10 int
+ // N005 ( 11, 19) [000011] ------------ * jmpTrue void
+ //
+ // To something like this:
+ //
+ // ------------ BB02 [004..014) -> BB03 (cond), preds={BB06,BB07,BB01} succs={BB06,BB03}
+ // [000099] ------------ t99 = const int 0
+ //
+ // [000101] ------------ t101 = const int 0
+ //
+ // /--* t99 int
+ // +--* t101 int
+ // N004 ( 9, 17) [000010] N------N-U-- t10 = * > int $149
+ //
+ // /--* t10 int
+ // N005 ( 11, 19) [000011] ------------ * jmpTrue void
+ //
+ //
+ // ------------ BB06 [???..???) -> BB02 (cond), preds={BB02} succs={BB07,BB02}
+ // [000105] -------N-U-- jcc void cond=<
+ //
+ //
+ // ------------ BB07 [???..???) -> BB02 (cond), preds={BB06} succs={BB03,BB02}
+ // N001 ( 1, 1) [000006] ------------ t6 = lclVar int V02 loc0 u:5 $148
+ //
+ // N003 ( 3, 10) [000009] ------------ t9 = const int 3
+ //
+ // /--* t6 int
+ // +--* t9 int
+ // [000106] N------N-U-- t106 = * < int
+ //
+ // /--* t106 int
+ // [000107] ------------ * jmpTrue void
+ //
+ // Which will eventually generate code similar to the following:
+ //
+ // 33DB xor ebx, ebx
+ // 85DB test ebx, ebx
+ // 7707 ja SHORT G_M50523_IG04
+ // 72E7 jb SHORT G_M50523_IG03
+ // 83F803 cmp eax, 3
+ // 72E2 jb SHORT G_M50523_IG03
+ //
- GenTree* src1 = cmp->gtGetOp1();
- GenTree* src2 = cmp->gtGetOp2();
- unsigned weight = m_block->getBBWeight(comp);
+ GenTree* src1 = cmp->gtGetOp1();
+ GenTree* src2 = cmp->gtGetOp2();
+ unsigned weight = m_block->getBBWeight(comp);
- LIR::Use loSrc1(BlockRange(), &(src1->gtOp.gtOp1), src1);
- LIR::Use loSrc2(BlockRange(), &(src2->gtOp.gtOp1), src2);
+ LIR::Use loSrc1(BlockRange(), &(src1->gtOp.gtOp1), src1);
+ LIR::Use loSrc2(BlockRange(), &(src2->gtOp.gtOp1), src2);
- if (loSrc1.Def()->OperGet() != GT_CNS_INT && loSrc1.Def()->OperGet() != GT_LCL_VAR)
- {
- loSrc1.ReplaceWithLclVar(comp, weight);
- }
+ // TODO-CQ-32bit: We should move more code to the new basic block, currently we're only moving
+ // constants and lclvars. In particular, it would be nice to move GT_AND nodes as that would
+ // enable the and-cmp to test transform that happens later in this function. Though that's not
+ // exactly ideal, the and-cmp to test transform should run before this code but:
+ // - it would need to run before decomposition otherwise it won't recognize the 0 constant
+ // because after decomposition it is packed in a GT_LONG
+ // - this code would also need to handle GT_TEST_EQ/GT_TEST_NE
- if (loSrc2.Def()->OperGet() != GT_CNS_INT && loSrc2.Def()->OperGet() != GT_LCL_VAR)
- {
- loSrc2.ReplaceWithLclVar(comp, weight);
- }
+ if (!loSrc1.Def()->OperIs(GT_CNS_INT, GT_LCL_VAR))
+ {
+ loSrc1.ReplaceWithLclVar(comp, weight);
+ }
+
+ if (!loSrc2.Def()->OperIs(GT_CNS_INT, GT_LCL_VAR))
+ {
+ loSrc2.ReplaceWithLclVar(comp, weight);
+ }
- BasicBlock* jumpDest = m_block->bbJumpDest;
- BasicBlock* nextDest = m_block->bbNext;
- BasicBlock* newBlock = comp->fgSplitBlockAtEnd(m_block);
+ BasicBlock* jumpDest = m_block->bbJumpDest;
+ BasicBlock* nextDest = m_block->bbNext;
+ BasicBlock* newBlock = comp->fgSplitBlockAtEnd(m_block);
- cmp->gtType = TYP_INT;
- cmp->gtOp.gtOp1 = src1->gtOp.gtOp2;
- cmp->gtOp.gtOp2 = src2->gtOp.gtOp2;
+ cmp->gtType = TYP_INT;
+ cmp->gtOp.gtOp1 = src1->gtOp.gtOp2;
+ cmp->gtOp.gtOp2 = src2->gtOp.gtOp2;
- if (cmp->OperGet() == GT_EQ || cmp->OperGet() == GT_NE)
- {
- // 64-bit equality comparisons (no matter the polarity) require two 32-bit comparisons: one for the upper 32
- // bits and one for the lower 32 bits. As such, we update the flow graph like so:
- //
- // Before:
- // BB0: cond
- // / \
- // false true
- // | |
- // BB1 BB2
- //
- // After:
- // BB0: cond(hi)
- // / \
- // false true
- // | |
- // | BB3: cond(lo)
- // | / \
- // | false true
- // \ / |
- // BB1 BB2
- //
+ if (cmp->OperIs(GT_EQ, GT_NE))
+ {
+ // 64-bit equality comparisons (no matter the polarity) require two 32-bit comparisons: one for the upper 32
+ // bits and one for the lower 32 bits. As such, we update the flow graph like so:
+ //
+ // Before:
+ // BB0: cond
+ // / \
+ // false true
+ // | |
+ // BB1 BB2
+ //
+ // After:
+ // BB0: cond(hi)
+ // / \
+ // false true
+ // | |
+ // | BB3: cond(lo)
+ // | / \
+ // | false true
+ // \ / |
+ // BB1 BB2
+ //
- BlockRange().Remove(loSrc1.Def());
- BlockRange().Remove(loSrc2.Def());
- GenTree* loCmp = comp->gtNewOperNode(cmp->OperGet(), TYP_INT, loSrc1.Def(), loSrc2.Def());
- loCmp->gtFlags = cmp->gtFlags;
- GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
- LIR::AsRange(newBlock).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
+ BlockRange().Remove(loSrc1.Def());
+ BlockRange().Remove(loSrc2.Def());
+ GenTree* loCmp = comp->gtNewOperNode(cmp->OperGet(), TYP_INT, loSrc1.Def(), loSrc2.Def());
+ loCmp->gtFlags = cmp->gtFlags;
+ GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
+ LIR::AsRange(newBlock).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
- m_block->bbJumpKind = BBJ_COND;
+ m_block->bbJumpKind = BBJ_COND;
+
+ if (cmp->OperIs(GT_EQ))
+ {
+ cmp->gtOper = GT_NE;
+ m_block->bbJumpDest = nextDest;
+ nextDest->bbFlags |= BBF_JMP_TARGET;
+ comp->fgAddRefPred(nextDest, m_block);
+ }
+ else
+ {
+ m_block->bbJumpDest = jumpDest;
+ comp->fgAddRefPred(jumpDest, m_block);
+ }
- if (cmp->OperGet() == GT_EQ)
+ assert(newBlock->bbJumpKind == BBJ_COND);
+ assert(newBlock->bbJumpDest == jumpDest);
+ }
+ else
{
- cmp->gtOper = GT_NE;
+ // 64-bit ordinal comparisons are more complicated: they require two comparisons for the upper 32 bits and
+ // one comparison for the lower 32 bits. We update the flowgraph as such:
+ //
+ // Before:
+ // BB0: cond
+ // / \
+ // false true
+ // | |
+ // BB1 BB2
+ //
+ // After:
+ // BB0: (!cond(hi) && !eq(hi))
+ // / \
+ // true false
+ // | |
+ // | BB3: (cond(hi) && !eq(hi))
+ // | / \
+ // | false true
+ // | | |
+ // | BB4: cond(lo) |
+ // | / \ |
+ // | false true |
+ // \ / \ /
+ // BB1 BB2
+ //
+ //
+ // Note that the actual comparisons used to implement "(!cond(hi) && !eq(hi))" and "(cond(hi) && !eq(hi))"
+ // differ based on the original condition, and all consist of a single node. The switch statement below
+ // performs the necessary mapping.
+ //
+
+ genTreeOps hiCmpOper;
+ genTreeOps loCmpOper;
+
+ switch (cmp->OperGet())
+ {
+ case GT_LT:
+ cmp->gtOper = GT_GT;
+ hiCmpOper = GT_LT;
+ loCmpOper = GT_LT;
+ break;
+ case GT_LE:
+ cmp->gtOper = GT_GT;
+ hiCmpOper = GT_LT;
+ loCmpOper = GT_LE;
+ break;
+ case GT_GT:
+ cmp->gtOper = GT_LT;
+ hiCmpOper = GT_GT;
+ loCmpOper = GT_GT;
+ break;
+ case GT_GE:
+ cmp->gtOper = GT_LT;
+ hiCmpOper = GT_GT;
+ loCmpOper = GT_GE;
+ break;
+ default:
+ unreached();
+ }
+
+ BasicBlock* newBlock2 = comp->fgSplitBlockAtEnd(newBlock);
+
+ GenTree* hiJcc = new (comp, GT_JCC) GenTreeJumpCC(hiCmpOper);
+ hiJcc->gtFlags = cmp->gtFlags;
+ LIR::AsRange(newBlock).InsertAfter(nullptr, hiJcc);
+
+ BlockRange().Remove(loSrc1.Def());
+ BlockRange().Remove(loSrc2.Def());
+ GenTree* loCmp = comp->gtNewOperNode(loCmpOper, TYP_INT, loSrc1.Def(), loSrc2.Def());
+ loCmp->gtFlags = cmp->gtFlags | GTF_UNSIGNED;
+ GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
+ LIR::AsRange(newBlock2).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
+
+ m_block->bbJumpKind = BBJ_COND;
m_block->bbJumpDest = nextDest;
nextDest->bbFlags |= BBF_JMP_TARGET;
comp->fgAddRefPred(nextDest, m_block);
+
+ newBlock->bbJumpKind = BBJ_COND;
+ newBlock->bbJumpDest = jumpDest;
+ comp->fgAddRefPred(jumpDest, newBlock);
+
+ assert(newBlock2->bbJumpKind == BBJ_COND);
+ assert(newBlock2->bbJumpDest == jumpDest);
}
- else
+
+ BlockRange().Remove(src1);
+ BlockRange().Remove(src2);
+ }
+#endif
+
+#ifdef _TARGET_XARCH_
+#ifdef _TARGET_AMD64_
+ if (cmp->gtGetOp1()->TypeGet() != cmp->gtGetOp2()->TypeGet())
+ {
+ bool op1Is64Bit = (genTypeSize(cmp->gtGetOp1()->TypeGet()) == 8);
+ bool op2Is64Bit = (genTypeSize(cmp->gtGetOp2()->TypeGet()) == 8);
+
+ if (op1Is64Bit != op2Is64Bit)
{
- m_block->bbJumpDest = jumpDest;
- comp->fgAddRefPred(jumpDest, m_block);
- }
+ //
+ // Normally this should not happen. IL allows comparing int32 to native int but the importer
+ // automatically inserts a cast from int32 to long on 64 bit architectures. However, the JIT
+ // accidentally generates int/long comparisons internally:
+ // - loop cloning compares int (and even small int) index limits against long constants
+ // - switch lowering compares a 64 bit switch value against a int32 constant
+ //
+ // TODO-Cleanup: The above mentioned issues should be fixed and then the code below may be
+ // replaced with an assert or at least simplified. The special casing of constants in code
+ // below is only necessary to prevent worse code generation for switches and loop cloning.
+ //
- assert(newBlock->bbJumpKind == BBJ_COND);
- assert(newBlock->bbJumpDest == jumpDest);
+ GenTree* longOp = op1Is64Bit ? cmp->gtOp.gtOp1 : cmp->gtOp.gtOp2;
+ GenTree** smallerOpUse = op2Is64Bit ? &cmp->gtOp.gtOp1 : &cmp->gtOp.gtOp2;
+ var_types smallerType = (*smallerOpUse)->TypeGet();
+
+ assert(genTypeSize(smallerType) < 8);
+
+ if (longOp->IsCnsIntOrI() && genTypeCanRepresentValue(smallerType, longOp->AsIntCon()->IconValue()))
+ {
+ longOp->gtType = smallerType;
+ }
+ else if ((*smallerOpUse)->IsCnsIntOrI())
+ {
+ (*smallerOpUse)->gtType = TYP_LONG;
+ }
+ else
+ {
+ GenTree* cast = comp->gtNewCastNode(TYP_LONG, *smallerOpUse, TYP_LONG);
+ *smallerOpUse = cast;
+ BlockRange().InsertAfter(cast->gtGetOp1(), cast);
+ }
+ }
}
- else
+#endif // _TARGET_AMD64_
+
+ if (cmp->gtGetOp2()->IsIntegralConst())
{
- // 64-bit ordinal comparisons are more complicated: they require two comparisons for the upper 32 bits and one
- // comparison for the lower 32 bits. We update the flowgraph as such:
- //
- // Before:
- // BB0: cond
- // / \
- // false true
- // | |
- // BB1 BB2
- //
- // After:
- // BB0: (!cond(hi) && !eq(hi))
- // / \
- // true false
- // | |
- // | BB3: (cond(hi) && !eq(hi))
- // | / \
- // | false true
- // | | |
- // | BB4: cond(lo) |
- // | / \ |
- // | false true |
- // \ / \ /
- // BB1 BB2
- //
- //
- // Note that the actual comparisons used to implement "(!cond(hi) && !eq(hi))" and "(cond(hi) && !eq(hi))"
- // differ based on the original condition, and all consist of a single node. The switch statement below
- // performs the necessary mapping.
- //
+ GenTree* op1 = cmp->gtGetOp1();
+ var_types op1Type = op1->TypeGet();
+ GenTreeIntCon* op2 = cmp->gtGetOp2()->AsIntCon();
+ ssize_t op2Value = op2->IconValue();
- genTreeOps hiCmpOper;
- genTreeOps loCmpOper;
+ if (op1->isMemoryOp() && varTypeIsSmall(op1Type) && genTypeCanRepresentValue(op1Type, op2Value))
+ {
+ //
+ // If op1's type is small then try to narrow op2 so it has the same type as op1.
+ // Small types are usually used by memory loads and if both compare operands have
+ // the same type then the memory load can be contained. In certain situations
+ // (e.g "cmp ubyte, 200") we also get a smaller instruction encoding.
+ //
- switch (cmp->OperGet())
+ op2->gtType = op1Type;
+ }
+ else if (op1->OperIs(GT_CAST) && !op1->gtOverflow())
{
- case GT_LT:
- cmp->gtOper = GT_GT;
- hiCmpOper = GT_LT;
- loCmpOper = GT_LT;
- break;
- case GT_LE:
- cmp->gtOper = GT_GT;
- hiCmpOper = GT_LT;
- loCmpOper = GT_LE;
- break;
- case GT_GT:
- cmp->gtOper = GT_LT;
- hiCmpOper = GT_GT;
- loCmpOper = GT_GT;
- break;
- case GT_GE:
- cmp->gtOper = GT_LT;
- hiCmpOper = GT_GT;
- loCmpOper = GT_GE;
- break;
- default:
- unreached();
+ GenTreeCast* cast = op1->AsCast();
+ var_types castToType = cast->CastToType();
+ GenTree* castOp = cast->gtGetOp1();
+
+ if (((castToType == TYP_BOOL) || (castToType == TYP_UBYTE)) && FitsIn<UINT8>(op2Value))
+ {
+ //
+ // Since we're going to remove the cast we need to be able to narrow the cast operand
+ // to the cast type. This can be done safely only for certain opers (e.g AND, OR, XOR).
+ // Some opers just can't be narrowed (e.g DIV, MUL) while other could be narrowed but
+ // doing so would produce incorrect results (e.g. RSZ, RSH).
+ //
+ // The below list of handled opers is conservative but enough to handle the most common
+ // situations. In particular this include CALL, sometimes the JIT unnecessarilly widens
+ // the result of bool returning calls.
+ //
+
+ if (castOp->OperIs(GT_CALL, GT_LCL_VAR) || castOp->OperIsLogical() || castOp->isMemoryOp())
+ {
+ assert(!castOp->gtOverflowEx()); // Must not be an overflow checking operation
+
+ castOp->gtType = castToType;
+ cmp->gtOp.gtOp1 = castOp;
+ op2->gtType = castToType;
+
+ BlockRange().Remove(cast);
+ }
+ }
}
+ else if (op1->OperIs(GT_AND) && cmp->OperIs(GT_EQ, GT_NE))
+ {
+ //
+ // Transform ((x AND y) EQ|NE 0) into (x TEST_EQ|TEST_NE y) when possible.
+ //
- BasicBlock* newBlock2 = comp->fgSplitBlockAtEnd(newBlock);
+ GenTree* andOp1 = op1->gtGetOp1();
+ GenTree* andOp2 = op1->gtGetOp2();
- GenTree* hiJcc = new (comp, GT_JCC) GenTreeJumpCC(hiCmpOper);
- hiJcc->gtFlags = cmp->gtFlags;
- LIR::AsRange(newBlock).InsertAfter(nullptr, hiJcc);
+ if (op2Value != 0)
+ {
+ //
+ // If we don't have a 0 compare we can get one by transforming ((x AND mask) EQ|NE mask)
+ // into ((x AND mask) NE|EQ 0) when mask is a single bit.
+ //
- BlockRange().Remove(loSrc1.Def());
- BlockRange().Remove(loSrc2.Def());
- GenTree* loCmp = comp->gtNewOperNode(loCmpOper, TYP_INT, loSrc1.Def(), loSrc2.Def());
- loCmp->gtFlags = cmp->gtFlags | GTF_UNSIGNED;
- GenTree* loJtrue = comp->gtNewOperNode(GT_JTRUE, TYP_VOID, loCmp);
- LIR::AsRange(newBlock2).InsertAfter(nullptr, loSrc1.Def(), loSrc2.Def(), loCmp, loJtrue);
+ if (isPow2(static_cast<size_t>(op2Value)) && andOp2->IsIntegralConst(op2Value))
+ {
+ op2Value = 0;
+ op2->SetIconValue(0);
+ cmp->SetOperRaw(GenTree::ReverseRelop(cmp->OperGet()));
+ }
+ }
- m_block->bbJumpKind = BBJ_COND;
- m_block->bbJumpDest = nextDest;
- nextDest->bbFlags |= BBF_JMP_TARGET;
- comp->fgAddRefPred(nextDest, m_block);
+ if (op2Value == 0)
+ {
+ BlockRange().Remove(op1);
+ BlockRange().Remove(op2);
- newBlock->bbJumpKind = BBJ_COND;
- newBlock->bbJumpDest = jumpDest;
- comp->fgAddRefPred(jumpDest, newBlock);
+ cmp->SetOperRaw(cmp->OperIs(GT_EQ) ? GT_TEST_EQ : GT_TEST_NE);
+ cmp->gtOp.gtOp1 = andOp1;
+ cmp->gtOp.gtOp2 = andOp2;
- assert(newBlock2->bbJumpKind == BBJ_COND);
- assert(newBlock2->bbJumpDest == jumpDest);
+ if (andOp1->isMemoryOp() && andOp2->IsIntegralConst())
+ {
+ //
+ // For "test" we only care about the bits that are set in the second operand (mask).
+ // If the mask fits in a small type then we can narrow both operands to generate a "test"
+ // instruction with a smaller encoding ("test" does not have a r/m32, imm8 form) and avoid
+ // a widening load in some cases.
+ //
+ // For 16 bit operands we narrow only if the memory operand is already 16 bit. This matches
+ // the behavior of a previous implementation and avoids adding more cases where we generate
+ // 16 bit instructions that require a length changing prefix (0x66). These suffer from
+ // significant decoder stalls on Intel CPUs.
+ //
+ // We could also do this for 64 bit masks that fit into 32 bit but it doesn't help.
+ // In such cases morph narrows down the existing GT_AND by inserting a cast between it and
+ // the memory operand so we'd need to add more code to recognize and eliminate that cast.
+ //
+
+ size_t mask = static_cast<size_t>(andOp2->AsIntCon()->IconValue());
+
+ if (FitsIn<UINT8>(mask))
+ {
+ andOp1->gtType = TYP_UBYTE;
+ andOp2->gtType = TYP_UBYTE;
+ }
+ else if (FitsIn<UINT16>(mask) && genTypeSize(andOp1) == 2)
+ {
+ andOp1->gtType = TYP_CHAR;
+ andOp2->gtType = TYP_CHAR;
+ }
+ }
+ }
+ }
}
- BlockRange().Remove(src1);
- BlockRange().Remove(src2);
-#endif
+ if (cmp->gtGetOp1()->TypeGet() == cmp->gtGetOp2()->TypeGet())
+ {
+ if (varTypeIsSmall(cmp->gtGetOp1()->TypeGet()) && varTypeIsUnsigned(cmp->gtGetOp1()->TypeGet()))
+ {
+ //
+ // If both operands have the same type then codegen will use the common operand type to
+ // determine the instruction type. For small types this would result in performing a
+ // signed comparison of two small unsigned values without zero extending them to TYP_INT
+ // which is incorrect. Note that making the comparison unsigned doesn't imply that codegen
+ // has to generate a small comparison, it can still correctly generate a TYP_INT comparison.
+ //
+
+ cmp->gtFlags |= GTF_UNSIGNED;
+ }
+ }
+#endif // _TARGET_XARCH_
}
// Lower "jmp <method>" tail call to insert PInvoke method epilog if required.
@@ -3498,18 +3678,19 @@ GenTree* Lowering::TryCreateAddrMode(LIR::Use&& use, bool isIndir)
// make sure there are not any side effects between def of leaves and use
if (!doAddrMode || AreSourcesPossiblyModifiedLocals(addr, base, index))
{
- JITDUMP(" No addressing mode\n");
+ JITDUMP("No addressing mode:\n ");
+ DISPNODE(addr);
return addr;
}
GenTreePtr arrLength = nullptr;
JITDUMP("Addressing mode:\n");
- JITDUMP(" Base\n");
+ JITDUMP(" Base\n ");
DISPNODE(base);
if (index != nullptr)
{
- JITDUMP(" + Index * %u + %u\n", scale, offset);
+ JITDUMP(" + Index * %u + %u\n ", scale, offset);
DISPNODE(index);
}
else
@@ -4023,12 +4204,6 @@ void Lowering::LowerStoreInd(GenTree* node)
node->AsStoreInd()->SetRMWStatusDefault();
}
-void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
-{
- GenTree* src = blkNode->Data();
- TryCreateAddrMode(LIR::Use(BlockRange(), &blkNode->Addr(), blkNode), false);
-}
-
//------------------------------------------------------------------------
// LowerArrElem: Lower a GT_ARR_ELEM node
//
@@ -4303,13 +4478,12 @@ void Lowering::DoPhase()
m_block = block;
for (GenTree* node : BlockRange().NonPhiNodes())
{
-/* We increment the number position of each tree node by 2 to
-* simplify the logic when there's the case of a tree that implicitly
-* does a dual-definition of temps (the long case). In this case
-* is easier to already have an idle spot to handle a dual-def instead
-* of making some messy adjustments if we only increment the
-* number position by one.
-*/
+ // We increment the number position of each tree node by 2 to simplify the logic when there's the case of
+ // a tree that implicitly does a dual-definition of temps (the long case). In this case it is easier to
+ // already have an idle spot to handle a dual-def instead of making some messy adjustments if we only
+ // increment the number position by one.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
#ifdef DEBUG
node->gtSeqNum = currentLoc;
#endif
@@ -4633,13 +4807,8 @@ bool Lowering::NodesAreEquivalentLeaves(GenTreePtr tree1, GenTreePtr tree2)
}
}
-#ifdef _TARGET_64BIT_
/**
* Get common information required to handle a cast instruction
- *
- * Right now only supports 64 bit targets. In order to support 32 bit targets the
- * switch statement needs work.
- *
*/
void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
{
@@ -4675,7 +4844,6 @@ void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
bool signCheckOnly = false;
// Do we need to compare the value, or just check masks
-
switch (dstType)
{
default:
@@ -4709,9 +4877,13 @@ void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
}
else
{
+#ifdef _TARGET_64BIT_
typeMask = 0xFFFFFFFF80000000LL;
- typeMin = INT_MIN;
- typeMax = INT_MAX;
+#else
+ typeMask = 0x80000000;
+#endif
+ typeMin = INT_MIN;
+ typeMax = INT_MAX;
}
break;
@@ -4722,7 +4894,11 @@ void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
}
else
{
+#ifdef _TARGET_64BIT_
typeMask = 0xFFFFFFFF00000000LL;
+#else
+ typeMask = 0x00000000;
+#endif
}
break;
@@ -4746,8 +4922,6 @@ void Lowering::getCastDescription(GenTreePtr treeNode, CastInfo* castInfo)
}
}
-#endif // _TARGET_64BIT_
-
#ifdef DEBUG
void Lowering::DumpNodeInfoMap()
{
diff --git a/src/jit/lower.h b/src/jit/lower.h
index c1cafb4ee8..57b4127f26 100644
--- a/src/jit/lower.h
+++ b/src/jit/lower.h
@@ -45,9 +45,7 @@ public:
bool signCheckOnly; // For converting between unsigned/signed int
};
-#ifdef _TARGET_64BIT_
static void getCastDescription(GenTreePtr treeNode, CastInfo* castInfo);
-#endif // _TARGET_64BIT_
private:
#ifdef DEBUG
@@ -168,7 +166,7 @@ private:
// operands.
//
// Arguments:
- // tree - Gentree of a bininary operation.
+ // tree - Gentree of a binary operation.
//
// Returns
// None.
@@ -194,6 +192,7 @@ private:
}
}
#endif // defined(_TARGET_XARCH_)
+ void TreeNodeInfoInitStoreLoc(GenTree* tree);
void TreeNodeInfoInitReturn(GenTree* tree);
void TreeNodeInfoInitShiftRotate(GenTree* tree);
void TreeNodeInfoInitCall(GenTreeCall* call);
@@ -203,14 +202,26 @@ private:
void TreeNodeInfoInitLogicalOp(GenTree* tree);
void TreeNodeInfoInitModDiv(GenTree* tree);
void TreeNodeInfoInitIntrinsic(GenTree* tree);
+ void TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* tree);
+ void TreeNodeInfoInitIndir(GenTree* indirTree);
+ void TreeNodeInfoInitGCWriteBarrier(GenTree* tree);
+#if !CPU_LOAD_STORE_ARCH
+ bool TreeNodeInfoInitIfRMWMemOp(GenTreePtr storeInd);
+#endif
#ifdef FEATURE_SIMD
void TreeNodeInfoInitSIMD(GenTree* tree);
#endif // FEATURE_SIMD
void TreeNodeInfoInitCast(GenTree* tree);
#ifdef _TARGET_ARM64_
+ void LowerPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info);
+ void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info);
+#endif // _TARGET_ARM64_
+#ifdef _TARGET_ARM_
+ void LowerPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info);
void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info);
#endif // _TARGET_ARM64_
#ifdef FEATURE_PUT_STRUCT_ARG_STK
+ void LowerPutArgStk(GenTreePutArgStk* tree);
void TreeNodeInfoInitPutArgStk(GenTreePutArgStk* tree);
#endif // FEATURE_PUT_STRUCT_ARG_STK
void TreeNodeInfoInitLclHeap(GenTree* tree);
@@ -231,18 +242,16 @@ private:
void LowerCast(GenTree* node);
#if defined(_TARGET_XARCH_)
- void SetMulOpCounts(GenTreePtr tree);
+ void TreeNodeInfoInitMul(GenTreePtr tree);
+ void SetContainsAVXFlags(bool isFloatingPointType = true, unsigned sizeOfSIMDVector = 0);
#endif // defined(_TARGET_XARCH_)
#if !CPU_LOAD_STORE_ARCH
bool IsRMWIndirCandidate(GenTree* operand, GenTree* storeInd);
bool IsBinOpInRMWStoreInd(GenTreePtr tree);
bool IsRMWMemOpRootedAtStoreInd(GenTreePtr storeIndTree, GenTreePtr* indirCandidate, GenTreePtr* indirOpSource);
- bool SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd);
#endif
void LowerStoreLoc(GenTreeLclVarCommon* tree);
- void SetIndirAddrOpCounts(GenTree* indirTree);
- void LowerGCWriteBarrier(GenTree* tree);
GenTree* LowerArrElem(GenTree* node);
void LowerRotate(GenTree* tree);
diff --git a/src/jit/lowerarm.cpp b/src/jit/lowerarm.cpp
index 5bf23c4199..9792b8a9c6 100644
--- a/src/jit/lowerarm.cpp
+++ b/src/jit/lowerarm.cpp
@@ -23,8 +23,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
-// The ARM backend is not yet implemented, so the methods here are all NYI.
-// TODO-ARM-NYI: Lowering for ARM.
#ifdef _TARGET_ARM_
#include "jit.h"
@@ -33,6 +31,68 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "lsra.h"
//------------------------------------------------------------------------
+// LowerStoreLoc: Lower a store of a lclVar
+//
+// Arguments:
+// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
+//
+// Notes:
+// This involves:
+// - Widening operations of unsigneds.
+//
+void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
+{
+ // Try to widen the ops if they are going into a local var.
+ GenTree* op1 = storeLoc->gtGetOp1();
+ if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT))
+ {
+ GenTreeIntCon* con = op1->AsIntCon();
+ ssize_t ival = con->gtIconVal;
+ unsigned varNum = storeLoc->gtLclNum;
+ LclVarDsc* varDsc = comp->lvaTable + varNum;
+
+ if (varDsc->lvIsSIMDType())
+ {
+ noway_assert(storeLoc->gtType != TYP_STRUCT);
+ }
+ unsigned size = genTypeSize(storeLoc);
+ // If we are storing a constant into a local variable
+ // we extend the size of the store here
+ if ((size < 4) && !varTypeIsStruct(varDsc))
+ {
+ if (!varTypeIsUnsigned(varDsc))
+ {
+ if (genTypeSize(storeLoc) == 1)
+ {
+ if ((ival & 0x7f) != ival)
+ {
+ ival = ival | 0xffffff00;
+ }
+ }
+ else
+ {
+ assert(genTypeSize(storeLoc) == 2);
+ if ((ival & 0x7fff) != ival)
+ {
+ ival = ival | 0xffff0000;
+ }
+ }
+ }
+
+ // A local stack slot is at least 4 bytes in size, regardless of
+ // what the local var is typed as, so auto-promote it here
+ // unless it is a field of a promoted struct
+ // TODO-ARM-CQ: if the field is promoted shouldn't we also be able to do this?
+ if (!varDsc->lvIsStructField)
+ {
+ storeLoc->gtType = TYP_INT;
+ con->SetIconValue(ival);
+ }
+ }
+ }
+}
+
+//------------------------------------------------------------------------
// LowerCast: Lower GT_CAST(srcType, DstType) nodes.
//
// Arguments:
@@ -57,7 +117,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// Note that for the overflow conversions we still depend on helper calls and
// don't expect to see them here.
// i) GT_CAST(float/double, int type with overflow detection)
-
+//
void Lowering::LowerCast(GenTree* tree)
{
assert(tree->OperGet() == GT_CAST);
@@ -71,10 +131,8 @@ void Lowering::LowerCast(GenTree* tree)
var_types srcType = op1->TypeGet();
var_types tmpType = TYP_UNDEF;
- // TODO-ARM-Cleanup: Remove following NYI assertions.
if (varTypeIsFloating(srcType))
{
- NYI_ARM("Lowering for cast from float"); // Not tested yet.
noway_assert(!tree->gtOverflow());
}
@@ -104,36 +162,78 @@ void Lowering::LowerCast(GenTree* tree)
}
}
+//------------------------------------------------------------------------
+// LowerRotate: Lower GT_ROL and GT_ROL nodes.
+//
+// Arguments:
+// tree - the node to lower
+//
+// Return Value:
+// None.
+//
void Lowering::LowerRotate(GenTreePtr tree)
{
- NYI_ARM("ARM Lowering for ROL and ROR");
-}
+ if (tree->OperGet() == GT_ROL)
+ {
+ // There is no ROL instruction on ARM. Convert ROL into ROR.
+ GenTreePtr rotatedValue = tree->gtOp.gtOp1;
+ unsigned rotatedValueBitSize = genTypeSize(rotatedValue->gtType) * 8;
+ GenTreePtr rotateLeftIndexNode = tree->gtOp.gtOp2;
-void Lowering::TreeNodeInfoInit(GenTree* stmt)
-{
- NYI("ARM TreeNodInfoInit");
+ if (rotateLeftIndexNode->IsCnsIntOrI())
+ {
+ ssize_t rotateLeftIndex = rotateLeftIndexNode->gtIntCon.gtIconVal;
+ ssize_t rotateRightIndex = rotatedValueBitSize - rotateLeftIndex;
+ rotateLeftIndexNode->gtIntCon.gtIconVal = rotateRightIndex;
+ }
+ else
+ {
+ GenTreePtr tmp =
+ comp->gtNewOperNode(GT_NEG, genActualType(rotateLeftIndexNode->gtType), rotateLeftIndexNode);
+ BlockRange().InsertAfter(rotateLeftIndexNode, tmp);
+ tree->gtOp.gtOp2 = tmp;
+ }
+ tree->ChangeOper(GT_ROR);
+ }
}
-// returns true if the tree can use the read-modify-write memory instruction form
-bool Lowering::isRMWRegOper(GenTreePtr tree)
+//------------------------------------------------------------------------
+// LowerPutArgStk: Lower a GT_PUTARG_STK node
+//
+// Arguments:
+// argNode - a GT_PUTARG_STK node
+//
+// Return Value:
+// None.
+//
+// Notes:
+// There is currently no Lowering required for this on ARM.
+//
+void Lowering::LowerPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info)
{
- return false;
}
+//------------------------------------------------------------------------
+// IsCallTargetInRange: Can a call target address be encoded in-place?
+//
+// Return Value:
+// True if the addr fits into the range.
+//
bool Lowering::IsCallTargetInRange(void* addr)
{
return comp->codeGen->validImmForBL((ssize_t)addr);
}
-// return true if the immediate can be folded into an instruction, for example small enough and non-relocatable
+//------------------------------------------------------------------------
+// IsContainableImmed: Is an immediate encodable in-place?
+//
+// Return Value:
+// True if the immediate can be folded into an instruction,
+// for example small enough and non-relocatable.
bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
{
if (varTypeIsFloating(parentNode->TypeGet()))
{
- // TODO-ARM-Cleanup: not tested yet.
- NYI_ARM("ARM IsContainableImmed for floating point type");
-
- // We can contain a floating point 0.0 constant in a compare instruction
switch (parentNode->OperGet())
{
default:
@@ -146,7 +246,12 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
case GT_GE:
case GT_GT:
if (childNode->IsIntegralConst(0))
+ {
+ // TODO-ARM-Cleanup: not tested yet.
+ NYI_ARM("ARM IsContainableImmed for floating point type");
+ // We can contain a floating point 0.0 constant in a compare instruction
return true;
+ }
break;
}
}
@@ -185,13 +290,6 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode)
if (emitter::emitIns_valid_imm_for_alu(immVal))
return true;
break;
-
- case GT_STORE_LCL_VAR:
- // TODO-ARM-Cleanup: not tested yet
- NYI_ARM("ARM IsContainableImmed for GT_STORE_LCL_VAR");
- if (immVal == 0)
- return true;
- break;
}
}
diff --git a/src/jit/lowerarm64.cpp b/src/jit/lowerarm64.cpp
index cc9e2266d2..f5bc55e10c 100644
--- a/src/jit/lowerarm64.cpp
+++ b/src/jit/lowerarm64.cpp
@@ -29,34 +29,20 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "sideeffects.h"
#include "lower.h"
-// there is not much lowering to do with storing a local but
-// we do some handling of contained immediates and widening operations of unsigneds
+//------------------------------------------------------------------------
+// LowerStoreLoc: Lower a store of a lclVar
+//
+// Arguments:
+// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
+//
+// Notes:
+// This involves:
+// - Widening operations of unsigneds.
+
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
{
- TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
-
- // Is this the case of var = call where call is returning
- // a value in multiple return registers?
- GenTree* op1 = storeLoc->gtGetOp1();
- if (op1->IsMultiRegCall())
- {
- // backend expects to see this case only for store lclvar.
- assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
-
- // srcCount = number of registers in which the value is returned by call
- GenTreeCall* call = op1->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
-
- // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
- regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
- op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
- return;
- }
-
- CheckImmedAndMakeContained(storeLoc, op1);
-
// Try to widen the ops if they are going into a local var.
+ GenTree* op1 = storeLoc->gtGetOp1();
if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (op1->gtOper == GT_CNS_INT))
{
GenTreeIntCon* con = op1->AsIntCon();
@@ -105,1120 +91,8 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
}
}
-/**
- * Takes care of annotating the register requirements
- * for every TreeNodeInfo struct that maps to each tree node.
- * Preconditions:
- * LSRA has been initialized and there is a TreeNodeInfo node
- * already allocated and initialized for every tree in the IR.
- * Postconditions:
- * Every TreeNodeInfo instance has the right annotations on register
- * requirements needed by LSRA to build the Interval Table (source,
- * destination and internal [temp] register counts).
- * This code is refactored originally from LSRA.
- */
-void Lowering::TreeNodeInfoInit(GenTree* tree)
-{
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
- unsigned kind = tree->OperKind();
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- RegisterType registerType = TypeGet(tree);
-
- JITDUMP("TreeNodeInfoInit for: ");
- DISPNODE(tree);
- JITDUMP("\n");
-
- switch (tree->OperGet())
- {
- GenTree* op1;
- GenTree* op2;
-
- default:
- info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
- if (kind & (GTK_CONST | GTK_LEAF))
- {
- info->srcCount = 0;
- }
- else if (kind & (GTK_SMPOP))
- {
- if (tree->gtGetOp2() != nullptr)
- {
- info->srcCount = 2;
- }
- else
- {
- info->srcCount = 1;
- }
- }
- else
- {
- unreached();
- }
- break;
-
- case GT_STORE_LCL_FLD:
- case GT_STORE_LCL_VAR:
- info->srcCount = 1;
- info->dstCount = 0;
- LowerStoreLoc(tree->AsLclVarCommon());
- break;
-
- case GT_BOX:
- noway_assert(!"box should not exist here");
- // The result of 'op1' is also the final result
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_PHYSREGDST:
- info->srcCount = 1;
- info->dstCount = 0;
- break;
-
- case GT_COMMA:
- {
- GenTreePtr firstOperand;
- GenTreePtr secondOperand;
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- firstOperand = tree->gtOp.gtOp2;
- secondOperand = tree->gtOp.gtOp1;
- }
- else
- {
- firstOperand = tree->gtOp.gtOp1;
- secondOperand = tree->gtOp.gtOp2;
- }
- if (firstOperand->TypeGet() != TYP_VOID)
- {
- firstOperand->gtLsraInfo.isLocalDefUse = true;
- firstOperand->gtLsraInfo.dstCount = 0;
- }
- if (tree->TypeGet() == TYP_VOID && secondOperand->TypeGet() != TYP_VOID)
- {
- secondOperand->gtLsraInfo.isLocalDefUse = true;
- secondOperand->gtLsraInfo.dstCount = 0;
- }
- }
-
- __fallthrough;
-
- case GT_LIST:
- case GT_FIELD_LIST:
- case GT_ARGPLACE:
- case GT_NO_OP:
- case GT_START_NONGC:
- case GT_PROF_HOOK:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_CNS_DBL:
- info->srcCount = 0;
- info->dstCount = 1;
- {
- GenTreeDblCon* dblConst = tree->AsDblCon();
- double constValue = dblConst->gtDblCon.gtDconVal;
-
- if (emitter::emitIns_valid_imm_for_fmov(constValue))
- {
- // Directly encode constant to instructions.
- }
- else
- {
- // Reserve int to load constant from memory (IF_LARGELDC)
- info->internalIntCount = 1;
- }
- }
- break;
-
- case GT_QMARK:
- case GT_COLON:
- info->srcCount = 0;
- info->dstCount = 0;
- unreached();
- break;
-
- case GT_RETURN:
- TreeNodeInfoInitReturn(tree);
- break;
-
- case GT_RETFILT:
- if (tree->TypeGet() == TYP_VOID)
- {
- info->srcCount = 0;
- info->dstCount = 0;
- }
- else
- {
- assert(tree->TypeGet() == TYP_INT);
-
- info->srcCount = 1;
- info->dstCount = 0;
-
- info->setSrcCandidates(l, RBM_INTRET);
- tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
- }
- break;
-
- case GT_NOP:
- // A GT_NOP is either a passthrough (if it is void, or if it has
- // a child), but must be considered to produce a dummy value if it
- // has a type but no child
- info->srcCount = 0;
- if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
- {
- info->dstCount = 1;
- }
- else
- {
- info->dstCount = 0;
- }
- break;
-
- case GT_JTRUE:
- info->srcCount = 0;
- info->dstCount = 0;
- l->clearDstCount(tree->gtOp.gtOp1);
- break;
-
- case GT_JMP:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_SWITCH:
- // This should never occur since switch nodes must not be visible at this
- // point in the JIT.
- info->srcCount = 0;
- info->dstCount = 0; // To avoid getting uninit errors.
- noway_assert(!"Switch must be lowered at this point");
- break;
-
- case GT_JMPTABLE:
- info->srcCount = 0;
- info->dstCount = 1;
- break;
-
- case GT_SWITCH_TABLE:
- info->srcCount = 2;
- info->internalIntCount = 1;
- info->dstCount = 0;
- break;
-
- case GT_ASG:
- case GT_ASG_ADD:
- case GT_ASG_SUB:
- noway_assert(!"We should never hit any assignment operator in lowering");
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_ADD:
- case GT_SUB:
- if (varTypeIsFloating(tree->TypeGet()))
- {
- // overflow operations aren't supported on float/double types.
- assert(!tree->gtOverflow());
-
- // No implicit conversions at this stage as the expectation is that
- // everything is made explicit by adding casts.
- assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
-
- info->srcCount = 2;
- info->dstCount = 1;
-
- break;
- }
-
- __fallthrough;
-
- case GT_AND:
- case GT_OR:
- case GT_XOR:
- info->srcCount = 2;
- info->dstCount = 1;
- // Check and make op2 contained (if it is a containable immediate)
- CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
- break;
-
- case GT_RETURNTRAP:
- // this just turns into a compare of its child with an int
- // + a conditional call
- info->srcCount = 1;
- info->dstCount = 0;
- break;
-
- case GT_MOD:
- case GT_UMOD:
- NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64");
- assert(!"Shouldn't see an integer typed GT_MOD node in ARM64");
- break;
-
- case GT_MUL:
- if (tree->gtOverflow())
- {
- // Need a register different from target reg to check for overflow.
- info->internalIntCount = 2;
- }
- __fallthrough;
-
- case GT_DIV:
- case GT_MULHI:
- case GT_UDIV:
- {
- info->srcCount = 2;
- info->dstCount = 1;
- }
- break;
-
- case GT_INTRINSIC:
- {
- // TODO-ARM64-NYI
- // Right now only Abs/Round/Sqrt are treated as math intrinsics
- noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) ||
- (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) ||
- (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt));
-
- // Both operand and its result must be of the same floating point type.
- op1 = tree->gtOp.gtOp1;
- assert(varTypeIsFloating(op1));
- assert(op1->TypeGet() == tree->TypeGet());
-
- info->srcCount = 1;
- info->dstCount = 1;
- }
- break;
-
-#ifdef FEATURE_SIMD
- case GT_SIMD:
- TreeNodeInfoInitSIMD(tree);
- break;
-#endif // FEATURE_SIMD
-
- case GT_CAST:
- {
- // TODO-ARM64-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned
- // register.
- // see CodeGen::genIntToIntCast()
-
- info->srcCount = 1;
- info->dstCount = 1;
-
- // Non-overflow casts to/from float/double are done using SSE2 instructions
- // and that allow the source operand to be either a reg or memop. Given the
- // fact that casts from small int to float/double are done as two-level casts,
- // the source operand is always guaranteed to be of size 4 or 8 bytes.
- var_types castToType = tree->CastToType();
- GenTreePtr castOp = tree->gtCast.CastOp();
- var_types castOpType = castOp->TypeGet();
- if (tree->gtFlags & GTF_UNSIGNED)
- {
- castOpType = genUnsignedType(castOpType);
- }
-#ifdef DEBUG
- if (!tree->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(castOpType)))
- {
- // If converting to float/double, the operand must be 4 or 8 byte in size.
- if (varTypeIsFloating(castToType))
- {
- unsigned opSize = genTypeSize(castOpType);
- assert(opSize == 4 || opSize == 8);
- }
- }
-#endif // DEBUG
- // Some overflow checks need a temp reg
-
- CastInfo castInfo;
-
- // Get information about the cast.
- getCastDescription(tree, &castInfo);
-
- if (castInfo.requiresOverflowCheck)
- {
- var_types srcType = castOp->TypeGet();
- emitAttr cmpSize = EA_ATTR(genTypeSize(srcType));
-
- // If we cannot store the comparisons in an immediate for either
- // comparing against the max or min value, then we will need to
- // reserve a temporary register.
-
- bool canStoreMaxValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize);
- bool canStoreMinValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize);
-
- if (!canStoreMaxValue || !canStoreMinValue)
- {
- info->internalIntCount = 1;
- }
- }
- }
- break;
-
- case GT_NEG:
- info->srcCount = 1;
- info->dstCount = 1;
- break;
-
- case GT_NOT:
- info->srcCount = 1;
- info->dstCount = 1;
- break;
-
- case GT_LSH:
- case GT_RSH:
- case GT_RSZ:
- case GT_ROR:
- {
- info->srcCount = 2;
- info->dstCount = 1;
-
- GenTreePtr shiftBy = tree->gtOp.gtOp2;
- GenTreePtr source = tree->gtOp.gtOp1;
- if (shiftBy->IsCnsIntOrI())
- {
- l->clearDstCount(shiftBy);
- info->srcCount--;
- }
- }
- break;
-
- case GT_EQ:
- case GT_NE:
- case GT_LT:
- case GT_LE:
- case GT_GE:
- case GT_GT:
- TreeNodeInfoInitCmp(tree);
- break;
-
- case GT_CKFINITE:
- info->srcCount = 1;
- info->dstCount = 1;
- info->internalIntCount = 1;
- break;
-
- case GT_CMPXCHG:
- info->srcCount = 3;
- info->dstCount = 1;
-
- // TODO-ARM64-NYI
- NYI("CMPXCHG");
- break;
-
- case GT_LOCKADD:
- info->srcCount = 2;
- info->dstCount = 0;
- CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
- break;
-
- case GT_CALL:
- TreeNodeInfoInitCall(tree->AsCall());
- break;
-
- case GT_ADDR:
- {
- // For a GT_ADDR, the child node should not be evaluated into a register
- GenTreePtr child = tree->gtOp.gtOp1;
- assert(!l->isCandidateLocalRef(child));
- l->clearDstCount(child);
- info->srcCount = 0;
- info->dstCount = 1;
- }
- break;
-
- case GT_BLK:
- case GT_DYN_BLK:
- // These should all be eliminated prior to Lowering.
- assert(!"Non-store block node in Lowering");
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_STORE_BLK:
- case GT_STORE_OBJ:
- case GT_STORE_DYN_BLK:
- TreeNodeInfoInitBlockStore(tree->AsBlk());
- break;
-
- case GT_INIT_VAL:
- // Always a passthrough of its child's value.
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_LCLHEAP:
- {
- info->srcCount = 1;
- info->dstCount = 1;
-
- // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
- // Here '-' means don't care.
- //
- // Size? Init Memory? # temp regs
- // 0 - 0
- // const and <=6 ptr words - 0
- // const and <PageSize No 0
- // >6 ptr words Yes hasPspSym ? 1 : 0
- // Non-const Yes hasPspSym ? 1 : 0
- // Non-const No 2
- //
- // PSPSym - If the method has PSPSym increment internalIntCount by 1.
- //
- bool hasPspSym;
-#if FEATURE_EH_FUNCLETS
- hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM);
-#else
- hasPspSym = false;
-#endif
-
- GenTreePtr size = tree->gtOp.gtOp1;
- if (size->IsCnsIntOrI())
- {
- MakeSrcContained(tree, size);
-
- size_t sizeVal = size->gtIntCon.gtIconVal;
-
- if (sizeVal == 0)
- {
- info->internalIntCount = 0;
- }
- else
- {
- // Compute the amount of memory to properly STACK_ALIGN.
- // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
- // This should also help in debugging as we can examine the original size specified with
- // localloc.
- sizeVal = AlignUp(sizeVal, STACK_ALIGN);
- size_t cntStackAlignedWidthItems = (sizeVal >> STACK_ALIGN_SHIFT);
-
- // For small allocations upto 4 'stp' instructions (i.e. 64 bytes of localloc)
- //
- if (cntStackAlignedWidthItems <= 4)
- {
- info->internalIntCount = 0;
- }
- else if (!compiler->info.compInitMem)
- {
- // No need to initialize allocated stack space.
- if (sizeVal < compiler->eeGetPageSize())
- {
- info->internalIntCount = 0;
- }
- else
- {
- // We need two registers: regCnt and RegTmp
- info->internalIntCount = 2;
- }
- }
- else
- {
- // greater than 4 and need to zero initialize allocated stack space.
- // If the method has PSPSym, we need an internal register to hold regCnt
- // since targetReg allocated to GT_LCLHEAP node could be the same as one of
- // the the internal registers.
- info->internalIntCount = hasPspSym ? 1 : 0;
- }
- }
- }
- else
- {
- if (!compiler->info.compInitMem)
- {
- info->internalIntCount = 2;
- }
- else
- {
- // If the method has PSPSym, we need an internal register to hold regCnt
- // since targetReg allocated to GT_LCLHEAP node could be the same as one of
- // the the internal registers.
- info->internalIntCount = hasPspSym ? 1 : 0;
- }
- }
-
- // If the method has PSPSym, we would need an addtional register to relocate it on stack.
- if (hasPspSym)
- {
- // Exclude const size 0
- if (!size->IsCnsIntOrI() || (size->gtIntCon.gtIconVal > 0))
- info->internalIntCount++;
- }
- }
- break;
-
- case GT_ARR_BOUNDS_CHECK:
-#ifdef FEATURE_SIMD
- case GT_SIMD_CHK:
-#endif // FEATURE_SIMD
- {
- GenTreeBoundsChk* node = tree->AsBoundsChk();
- // Consumes arrLen & index - has no result
- info->srcCount = 2;
- info->dstCount = 0;
-
- GenTree* intCns = nullptr;
- GenTree* other = nullptr;
- if (CheckImmedAndMakeContained(tree, node->gtIndex))
- {
- intCns = node->gtIndex;
- other = node->gtArrLen;
- }
- else if (CheckImmedAndMakeContained(tree, node->gtArrLen))
- {
- intCns = node->gtArrLen;
- other = node->gtIndex;
- }
- else
- {
- other = node->gtIndex;
- }
- }
- break;
-
- case GT_ARR_ELEM:
- // These must have been lowered to GT_ARR_INDEX
- noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_ARR_INDEX:
- info->srcCount = 2;
- info->dstCount = 1;
-
- // We need one internal register when generating code for GT_ARR_INDEX, however the
- // register allocator always may just give us the same one as it gives us for the 'dst'
- // as a workaround we will just ask for two internal registers.
- //
- info->internalIntCount = 2;
-
- // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
- // times while the result is being computed.
- tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
- info->hasDelayFreeSrc = true;
- break;
-
- case GT_ARR_OFFSET:
- // This consumes the offset, if any, the arrObj and the effective index,
- // and produces the flattened offset for this dimension.
- info->srcCount = 3;
- info->dstCount = 1;
- info->internalIntCount = 1;
-
- // we don't want to generate code for this
- if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
- {
- MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
- }
- break;
-
- case GT_LEA:
- {
- GenTreeAddrMode* lea = tree->AsAddrMode();
-
- GenTree* base = lea->Base();
- GenTree* index = lea->Index();
- unsigned cns = lea->gtOffset;
-
- // This LEA is instantiating an address,
- // so we set up the srcCount and dstCount here.
- info->srcCount = 0;
- if (base != nullptr)
- {
- info->srcCount++;
- }
- if (index != nullptr)
- {
- info->srcCount++;
- }
- info->dstCount = 1;
-
- // On ARM64 we may need a single internal register
- // (when both conditions are true then we still only need a single internal register)
- if ((index != nullptr) && (cns != 0))
- {
- // ARM64 does not support both Index and offset so we need an internal register
- info->internalIntCount = 1;
- }
- else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE))
- {
- // This offset can't be contained in the add instruction, so we need an internal register
- info->internalIntCount = 1;
- }
- }
- break;
-
- case GT_STOREIND:
- {
- info->srcCount = 2;
- info->dstCount = 0;
- GenTree* src = tree->gtOp.gtOp2;
-
- if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
- {
- LowerGCWriteBarrier(tree);
- break;
- }
- if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0))
- {
- // an integer zero for 'src' can be contained.
- MakeSrcContained(tree, src);
- }
-
- SetIndirAddrOpCounts(tree);
- }
- break;
-
- case GT_NULLCHECK:
- info->dstCount = 0;
- info->srcCount = 1;
- info->isLocalDefUse = true;
- // null check is an indirection on an addr
- SetIndirAddrOpCounts(tree);
- break;
-
- case GT_IND:
- info->dstCount = 1;
- info->srcCount = 1;
- SetIndirAddrOpCounts(tree);
- break;
-
- case GT_CATCH_ARG:
- info->srcCount = 0;
- info->dstCount = 1;
- info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
- break;
-
- case GT_CLS_VAR:
- info->srcCount = 0;
- // GT_CLS_VAR, by the time we reach the backend, must always
- // be a pure use.
- // It will produce a result of the type of the
- // node, and use an internal register for the address.
-
- info->dstCount = 1;
- assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0);
- info->internalIntCount = 1;
- break;
- } // end switch (tree->OperGet())
-
- // We need to be sure that we've set info->srcCount and info->dstCount appropriately
- assert((info->dstCount < 2) || tree->IsMultiRegCall());
-}
-//------------------------------------------------------------------------
-// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
- GenTree* op1 = tree->gtGetOp1();
- regMaskTP useCandidates = RBM_NONE;
-
- info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
- info->dstCount = 0;
-
- if (varTypeIsStruct(tree))
- {
- // op1 has to be either an lclvar or a multi-reg returning call
- if ((op1->OperGet() == GT_LCL_VAR) || (op1->OperGet() == GT_LCL_FLD))
- {
- GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
- LclVarDsc* varDsc = &(compiler->lvaTable[lclVarCommon->gtLclNum]);
- assert(varDsc->lvIsMultiRegRet);
-
- // Mark var as contained if not enregistrable.
- if (!varTypeIsEnregisterableStruct(op1))
- {
- MakeSrcContained(tree, op1);
- }
- }
- else
- {
- noway_assert(op1->IsMultiRegCall());
-
- ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
- useCandidates = retTypeDesc->GetABIReturnRegs();
- }
- }
- else
- {
- // Non-struct type return - determine useCandidates
- switch (tree->TypeGet())
- {
- case TYP_VOID:
- useCandidates = RBM_NONE;
- break;
- case TYP_FLOAT:
- useCandidates = RBM_FLOATRET;
- break;
- case TYP_DOUBLE:
- useCandidates = RBM_DOUBLERET;
- break;
- case TYP_LONG:
- useCandidates = RBM_LNGRET;
- break;
- default:
- useCandidates = RBM_INTRET;
- break;
- }
- }
-
- if (useCandidates != RBM_NONE)
- {
- tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, useCandidates);
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCall: Set the NodeInfo for a call.
-//
-// Arguments:
-// call - The call node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
-{
- TreeNodeInfo* info = &(call->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
- bool hasMultiRegRetVal = false;
- ReturnTypeDesc* retTypeDesc = nullptr;
-
- info->srcCount = 0;
- if (call->TypeGet() != TYP_VOID)
- {
- hasMultiRegRetVal = call->HasMultiRegRetVal();
- if (hasMultiRegRetVal)
- {
- // dst count = number of registers in which the value is returned by call
- retTypeDesc = call->GetReturnTypeDesc();
- info->dstCount = retTypeDesc->GetReturnRegCount();
- }
- else
- {
- info->dstCount = 1;
- }
- }
- else
- {
- info->dstCount = 0;
- }
-
- GenTree* ctrlExpr = call->gtControlExpr;
- if (call->gtCallType == CT_INDIRECT)
- {
- // either gtControlExpr != null or gtCallAddr != null.
- // Both cannot be non-null at the same time.
- assert(ctrlExpr == nullptr);
- assert(call->gtCallAddr != nullptr);
- ctrlExpr = call->gtCallAddr;
- }
-
- // set reg requirements on call target represented as control sequence.
- if (ctrlExpr != nullptr)
- {
- // we should never see a gtControlExpr whose type is void.
- assert(ctrlExpr->TypeGet() != TYP_VOID);
-
- info->srcCount++;
-
- // In case of fast tail implemented as jmp, make sure that gtControlExpr is
- // computed into a register.
- if (call->IsFastTailCall())
- {
- // Fast tail call - make sure that call target is always computed in IP0
- // so that epilog sequence can generate "br xip0" to achieve fast tail call.
- ctrlExpr->gtLsraInfo.setSrcCandidates(l, genRegMask(REG_IP0));
- }
- }
-
- RegisterType registerType = call->TypeGet();
-
- // Set destination candidates for return value of the call.
- if (hasMultiRegRetVal)
- {
- assert(retTypeDesc != nullptr);
- info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
- }
- else if (varTypeIsFloating(registerType))
- {
- info->setDstCandidates(l, RBM_FLOATRET);
- }
- else if (registerType == TYP_LONG)
- {
- info->setDstCandidates(l, RBM_LNGRET);
- }
- else
- {
- info->setDstCandidates(l, RBM_INTRET);
- }
-
- // If there is an explicit this pointer, we don't want that node to produce anything
- // as it is redundant
- if (call->gtCallObjp != nullptr)
- {
- GenTreePtr thisPtrNode = call->gtCallObjp;
-
- if (thisPtrNode->gtOper == GT_PUTARG_REG)
- {
- l->clearOperandCounts(thisPtrNode);
- l->clearDstCount(thisPtrNode->gtOp.gtOp1);
- }
- else
- {
- l->clearDstCount(thisPtrNode);
- }
- }
-
- // First, count reg args
- bool callHasFloatRegArgs = false;
-
- for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
- {
- assert(list->OperIsList());
-
- GenTreePtr argNode = list->Current();
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
- assert(curArgTabEntry);
-
- if (curArgTabEntry->regNum == REG_STK)
- {
- // late arg that is not passed in a register
- assert(argNode->gtOper == GT_PUTARG_STK);
-
- TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry);
- continue;
- }
-
- var_types argType = argNode->TypeGet();
- bool argIsFloat = varTypeIsFloating(argType);
- callHasFloatRegArgs |= argIsFloat;
-
- regNumber argReg = curArgTabEntry->regNum;
- // We will setup argMask to the set of all registers that compose this argument
- regMaskTP argMask = 0;
-
- argNode = argNode->gtEffectiveVal();
-
- // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct
- if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_FIELD_LIST))
- {
- GenTreePtr actualArgNode = argNode;
- unsigned originalSize = 0;
-
- if (argNode->gtOper == GT_FIELD_LIST)
- {
- // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
- GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
-
- // Initailize the first register and the first regmask in our list
- regNumber targetReg = argReg;
- regMaskTP targetMask = genRegMask(targetReg);
- unsigned iterationNum = 0;
- originalSize = 0;
-
- for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest())
- {
- GenTreePtr putArgRegNode = fieldListPtr->Current();
- assert(putArgRegNode->gtOper == GT_PUTARG_REG);
- GenTreePtr putArgChild = putArgRegNode->gtOp.gtOp1;
-
- originalSize += REGSIZE_BYTES; // 8 bytes
-
- // Record the register requirements for the GT_PUTARG_REG node
- putArgRegNode->gtLsraInfo.setDstCandidates(l, targetMask);
- putArgRegNode->gtLsraInfo.setSrcCandidates(l, targetMask);
-
- // To avoid redundant moves, request that the argument child tree be
- // computed in the register in which the argument is passed to the call.
- putArgChild->gtLsraInfo.setSrcCandidates(l, targetMask);
-
- // We consume one source for each item in this list
- info->srcCount++;
- iterationNum++;
-
- // Update targetReg and targetMask for the next putarg_reg (if any)
- targetReg = genRegArgNext(targetReg);
- targetMask = genRegMask(targetReg);
- }
- }
- else
- {
-#ifdef DEBUG
- compiler->gtDispTreeRange(BlockRange(), argNode);
-#endif
- noway_assert(!"Unsupported TYP_STRUCT arg kind");
- }
-
- unsigned slots = ((unsigned)(roundUp(originalSize, REGSIZE_BYTES))) / REGSIZE_BYTES;
- regNumber curReg = argReg;
- regNumber lastReg = argIsFloat ? REG_ARG_FP_LAST : REG_ARG_LAST;
- unsigned remainingSlots = slots;
-
- while (remainingSlots > 0)
- {
- argMask |= genRegMask(curReg);
- remainingSlots--;
-
- if (curReg == lastReg)
- break;
-
- curReg = genRegArgNext(curReg);
- }
-
- // Struct typed arguments must be fully passed in registers (Reg/Stk split not allowed)
- noway_assert(remainingSlots == 0);
- argNode->gtLsraInfo.internalIntCount = 0;
- }
- else // A scalar argument (not a struct)
- {
- // We consume one source
- info->srcCount++;
-
- argMask |= genRegMask(argReg);
- argNode->gtLsraInfo.setDstCandidates(l, argMask);
- argNode->gtLsraInfo.setSrcCandidates(l, argMask);
-
- if (argNode->gtOper == GT_PUTARG_REG)
- {
- GenTreePtr putArgChild = argNode->gtOp.gtOp1;
-
- // To avoid redundant moves, request that the argument child tree be
- // computed in the register in which the argument is passed to the call.
- putArgChild->gtLsraInfo.setSrcCandidates(l, argMask);
- }
- }
- }
-
- // Now, count stack args
- // Note that these need to be computed into a register, but then
- // they're just stored to the stack - so the reg doesn't
- // need to remain live until the call. In fact, it must not
- // because the code generator doesn't actually consider it live,
- // so it can't be spilled.
-
- GenTreePtr args = call->gtCallArgs;
- while (args)
- {
- GenTreePtr arg = args->gtOp.gtOp1;
-
- // Skip arguments that have been moved to the Late Arg list
- if (!(args->gtFlags & GTF_LATE_ARG))
- {
- if (arg->gtOper == GT_PUTARG_STK)
- {
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
- assert(curArgTabEntry);
-
- assert(curArgTabEntry->regNum == REG_STK);
-
- TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry);
- }
- else
- {
- TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
- if (argInfo->dstCount != 0)
- {
- argInfo->isLocalDefUse = true;
- }
-
- argInfo->dstCount = 0;
- }
- }
- args = args->gtOp.gtOp2;
- }
-
- // If it is a fast tail call, it is already preferenced to use IP0.
- // Therefore, no need set src candidates on call tgt again.
- if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
- {
- // Don't assign the call target to any of the argument registers because
- // we will use them to also pass floating point arguments as required
- // by Arm64 ABI.
- ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
- }
-}
-
//------------------------------------------------------------------------
-// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
-//
-// Arguments:
-// argNode - a GT_PUTARG_STK node
-//
-// Return Value:
-// None.
-//
-// Notes:
-// Set the child node(s) to be contained when we have a multireg arg
-//
-void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info)
-{
- assert(argNode->gtOper == GT_PUTARG_STK);
-
- GenTreePtr putArgChild = argNode->gtOp.gtOp1;
-
- // Initialize 'argNode' as not contained, as this is both the default case
- // and how MakeSrcContained expects to find things setup.
- //
- argNode->gtLsraInfo.srcCount = 1;
- argNode->gtLsraInfo.dstCount = 0;
-
- // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
- if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST))
- {
- // We will use store instructions that each write a register sized value
-
- if (putArgChild->OperGet() == GT_FIELD_LIST)
- {
- // We consume all of the items in the GT_FIELD_LIST
- argNode->gtLsraInfo.srcCount = info->numSlots;
- }
- else
- {
- // We could use a ldp/stp sequence so we need two internal registers
- argNode->gtLsraInfo.internalIntCount = 2;
-
- if (putArgChild->OperGet() == GT_OBJ)
- {
- GenTreePtr objChild = putArgChild->gtOp.gtOp1;
- if (objChild->OperGet() == GT_LCL_VAR_ADDR)
- {
- // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
- // as one contained operation
- //
- MakeSrcContained(putArgChild, objChild);
- }
- }
-
- // We will generate all of the code for the GT_PUTARG_STK and it's child node
- // as one contained operation
- //
- MakeSrcContained(argNode, putArgChild);
- }
- }
- else
- {
- // We must not have a multi-reg struct
- assert(info->numSlots == 1);
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
+// LowerBlockStore: Set block store type
//
// Arguments:
// blkNode - The block store node of interest
@@ -1226,22 +100,17 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntr
// Return Value:
// None.
//
-// Notes:
-void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
{
- GenTree* dstAddr = blkNode->Addr();
- unsigned size = blkNode->gtBlkSize;
- GenTree* source = blkNode->Data();
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
+ Compiler* compiler = comp;
// Sources are dest address and initVal or source.
- // We may require an additional source or temp register for the size.
- blkNode->gtLsraInfo.srcCount = 2;
- blkNode->gtLsraInfo.dstCount = 0;
- GenTreePtr srcAddrOrFill = nullptr;
- bool isInitBlk = blkNode->OperIsInitBlkOp();
+ GenTreePtr srcAddrOrFill = nullptr;
+ bool isInitBlk = blkNode->OperIsInitBlkOp();
if (!isInitBlk)
{
@@ -1253,20 +122,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
if (source->gtOper == GT_IND)
{
srcAddrOrFill = blkNode->Data()->gtGetOp1();
- // We're effectively setting source as contained, but can't call MakeSrcContained, because the
- // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
- // If srcAddr is already non-contained, we don't need to change it.
- if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0)
- {
- srcAddrOrFill->gtLsraInfo.setDstCount(1);
- srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
- }
- m_lsra->clearOperandCounts(source);
- }
- else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
- {
- assert(source->IsLocal());
- MakeSrcContained(blkNode, source);
}
}
@@ -1303,41 +158,12 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
initVal->gtIntCon.gtIconVal = 0x0101010101010101LL * fill;
initVal->gtType = TYP_LONG;
}
-
- // In case we have a buffer >= 16 bytes
- // we can use SSE2 to do a 128-bit store in a single
- // instruction.
- if (size >= XMM_REGSIZE_BYTES)
- {
- // Reserve an XMM register to fill it with
- // a pack of 16 init value constants.
- blkNode->gtLsraInfo.internalFloatCount = 1;
- blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates());
- }
initBlkNode->gtBlkOpKind = GenTreeBlkOp::BlkOpKindUnroll;
- }
}
else
#endif // 0
{
- // The helper follows the regular ABI.
- dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
- initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
- if (size != 0)
- {
- // Reserve a temp register for the block size argument.
- blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ARG_2);
- blkNode->gtLsraInfo.internalIntCount = 1;
- }
- else
- {
- // The block size argument is a third argument to GT_STORE_DYN_BLK
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- blkNode->gtLsraInfo.setSrcCount(3);
- GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize;
- sizeNode->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
- }
}
}
else
@@ -1373,18 +199,7 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
assert(objNode->HasGCPtr());
#endif
- // We don't need to materialize the struct size but we still need
- // a temporary register to perform the sequence of loads and stores.
- blkNode->gtLsraInfo.internalIntCount = 1;
-
- dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF);
- // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
- // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
- // which is killed by a StoreObj (and thus needn't be reserved).
- if (srcAddrOrFill != nullptr)
- {
- srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF);
- }
+ blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
}
else
{
@@ -1395,41 +210,12 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
#if 0
// In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size
// we should unroll the loop to improve CQ.
+ // For reference see the code in lowerxarch.cpp.
// TODO-ARM64-CQ: cpblk loop unrolling is currently not implemented.
if ((size != 0) && (size <= INITBLK_UNROLL_LIMIT))
{
- // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
- // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
- // our framework assemblies, so this is the main code generation scheme we'll use.
- if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
- {
- info->internalIntCount++;
- info->addInternalCandidates(l, l->allRegs(TYP_INT));
- }
-
- if (size >= XMM_REGSIZE_BYTES)
- {
- // If we have a buffer larger than XMM_REGSIZE_BYTES,
- // reserve an XMM register to use it for a
- // series of 16-byte loads and stores.
- blkNode->gtLsraInfo.internalFloatCount = 1;
- blkNode->gtLsraInfo.addInternalCandidates(l, l->internalFloatRegCandidates());
- }
-
- // If src or dst are on stack, we don't have to generate the address into a register
- // because it's just some constant+SP
- if (srcAddr != nullptr && srcAddrOrFill->OperIsLocalAddr())
- {
- MakeSrcContained(blkNode, srcAddrOrFill);
- }
-
- if (dstAddr->OperIsLocalAddr())
- {
- MakeSrcContained(blkNode, dstAddr);
- }
-
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
}
else
@@ -1438,444 +224,10 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// In case we have a constant integer this means we went beyond
// CPBLK_UNROLL_LIMIT bytes of size, still we should never have the case of
// any GC-Pointers in the src struct.
-
- dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
- // The srcAddr goes in arg1.
- if (srcAddrOrFill != nullptr)
- {
- srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
- }
- if (size != 0)
- {
- // Reserve a temp register for the block size argument.
- internalIntCandidates |= RBM_ARG_2;
- internalIntCount++;
- }
- else
- {
- // The block size argument is a third argument to GT_STORE_DYN_BLK
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- blkNode->gtLsraInfo.setSrcCount(3);
- GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
- blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
- }
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
}
- if (internalIntCount != 0)
- {
- blkNode->gtLsraInfo.internalIntCount = internalIntCount;
- blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates);
- }
- }
- }
-}
-
-#ifdef FEATURE_SIMD
-//------------------------------------------------------------------------
-// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree.
-//
-// Arguments:
-// tree - The GT_SIMD node of interest
-//
-// Return Value:
-// None.
-
-void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
-{
- NYI("TreeNodeInfoInitSIMD");
- GenTreeSIMD* simdTree = tree->AsSIMD();
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* lsra = m_lsra;
- info->dstCount = 1;
- switch (simdTree->gtSIMDIntrinsicID)
- {
- case SIMDIntrinsicInit:
- {
- // This sets all fields of a SIMD struct to the given value.
- // Mark op1 as contained if it is either zero or int constant of all 1's.
- info->srcCount = 1;
- GenTree* op1 = tree->gtOp.gtOp1;
- if (op1->IsIntegralConst(0) || (simdTree->gtSIMDBaseType == TYP_INT && op1->IsCnsIntOrI() &&
- op1->AsIntConCommon()->IconValue() == 0xffffffff) ||
- (simdTree->gtSIMDBaseType == TYP_LONG && op1->IsCnsIntOrI() &&
- op1->AsIntConCommon()->IconValue() == 0xffffffffffffffffLL))
- {
- MakeSrcContained(tree, tree->gtOp.gtOp1);
- info->srcCount = 0;
- }
- }
- break;
-
- case SIMDIntrinsicInitN:
- info->srcCount = (int)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType));
- // Need an internal register to stitch together all the values into a single vector in an XMM reg.
- info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- break;
-
- case SIMDIntrinsicInitArray:
- // We have an array and an index, which may be contained.
- info->srcCount = 2;
- CheckImmedAndMakeContained(tree, tree->gtGetOp2());
- break;
-
- case SIMDIntrinsicDiv:
- // SSE2 has no instruction support for division on integer vectors
- noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 2;
- break;
-
- case SIMDIntrinsicAbs:
- // This gets implemented as bitwise-And operation with a mask
- // and hence should never see it here.
- unreached();
- break;
-
- case SIMDIntrinsicSqrt:
- // SSE2 has no instruction support for sqrt on integer vectors.
- noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 1;
- break;
-
- case SIMDIntrinsicAdd:
- case SIMDIntrinsicSub:
- case SIMDIntrinsicMul:
- case SIMDIntrinsicBitwiseAnd:
- case SIMDIntrinsicBitwiseAndNot:
- case SIMDIntrinsicBitwiseOr:
- case SIMDIntrinsicBitwiseXor:
- case SIMDIntrinsicMin:
- case SIMDIntrinsicMax:
- info->srcCount = 2;
-
- // SSE2 32-bit integer multiplication requires two temp regs
- if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT)
- {
- info->internalFloatCount = 2;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- break;
-
- case SIMDIntrinsicEqual:
- info->srcCount = 2;
- break;
-
- // SSE2 doesn't support < and <= directly on int vectors.
- // Instead we need to use > and >= with swapped operands.
- case SIMDIntrinsicLessThan:
- case SIMDIntrinsicLessThanOrEqual:
- info->srcCount = 2;
- noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType));
- break;
-
- // SIMDIntrinsicEqual is supported only on non-floating point base type vectors.
- // SSE2 cmpps/pd doesn't support > and >= directly on float/double vectors.
- // Instead we need to use < and <= with swapped operands.
- case SIMDIntrinsicGreaterThan:
- noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 2;
- break;
-
- case SIMDIntrinsicGreaterThanOrEqual:
- noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 2;
-
- // a >= b = (a==b) | (a>b)
- // To hold intermediate result of a==b and a>b we need two distinct
- // registers. We can use targetReg and one internal reg provided
- // they are distinct which is not guaranteed. Therefore, we request
- // two internal registers so that one of the internal registers has
- // to be different from targetReg.
- info->internalFloatCount = 2;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- break;
-
- case SIMDIntrinsicOpEquality:
- case SIMDIntrinsicOpInEquality:
- // Need two SIMD registers as scratch.
- // See genSIMDIntrinsicRelOp() for details on code sequence generate and
- // the need for two scratch registers.
- info->srcCount = 2;
- info->internalFloatCount = 2;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- break;
-
- case SIMDIntrinsicDotProduct:
- // Also need an internal register as scratch. Further we need that targetReg and internal reg
- // are two distinct regs. It is achieved by requesting two internal registers and one of them
- // has to be different from targetReg.
- //
- // See genSIMDIntrinsicDotProduct() for details on code sequence generated and
- // the need for scratch registers.
- info->srcCount = 2;
- info->internalFloatCount = 2;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- break;
-
- case SIMDIntrinsicGetItem:
- // This implements get_Item method. The sources are:
- // - the source SIMD struct
- // - index (which element to get)
- // The result is baseType of SIMD struct.
- info->srcCount = 2;
-
- op2 = tree->gtGetOp2()
- // If the index is a constant, mark it as contained.
- if (CheckImmedAndMakeContained(tree, op2))
- {
- info->srcCount = 1;
- }
-
- // If the index is not a constant, we will use the SIMD temp location to store the vector.
- // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we
- // can use that in the process of extracting the element.
- // In all other cases with constant index, we need a temp xmm register to extract the
- // element if index is other than zero.
- if (!op2->IsCnsIntOrI())
- {
- (void)comp->getSIMDInitTempVarNum();
- }
- else if (!varTypeIsFloating(simdTree->gtSIMDBaseType) && !op2->IsIntegralConst(0))
- {
- info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- break;
-
- case SIMDIntrinsicCast:
- info->srcCount = 1;
- break;
-
- // These should have been transformed in terms of other intrinsics
- case SIMDIntrinsicOpEquality:
- case SIMDIntrinsicOpInEquality:
- assert("OpEquality/OpInEquality intrinsics should not be seen during Lowering.");
- unreached();
-
- case SIMDIntrinsicGetX:
- case SIMDIntrinsicGetY:
- case SIMDIntrinsicGetZ:
- case SIMDIntrinsicGetW:
- case SIMDIntrinsicGetOne:
- case SIMDIntrinsicGetZero:
- case SIMDIntrinsicGetLength:
- case SIMDIntrinsicGetAllOnes:
- assert(!"Get intrinsics should not be seen during Lowering.");
- unreached();
-
- default:
- noway_assert(!"Unimplemented SIMD node type.");
- unreached();
- }
-}
-#endif // FEATURE_SIMD
-
-void Lowering::LowerGCWriteBarrier(GenTree* tree)
-{
- GenTreePtr dst = tree;
- GenTreePtr addr = tree->gtOp.gtOp1;
- GenTreePtr src = tree->gtOp.gtOp2;
-
- if (addr->OperGet() == GT_LEA)
- {
- // In the case where we are doing a helper assignment, if the dst
- // is an indir through an lea, we need to actually instantiate the
- // lea in a register
- GenTreeAddrMode* lea = addr->AsAddrMode();
-
- short leaSrcCount = 0;
- if (lea->Base() != nullptr)
- {
- leaSrcCount++;
- }
- if (lea->Index() != nullptr)
- {
- leaSrcCount++;
- }
- lea->gtLsraInfo.srcCount = leaSrcCount;
- lea->gtLsraInfo.dstCount = 1;
- }
-
-#if NOGC_WRITE_BARRIERS
- // For the NOGC JIT Helper calls
- //
- // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
- // the 'src' goes into x15 (REG_WRITE_BARRIER)
- //
- addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_DST_BYREF);
- src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER);
-#else
- // For the standard JIT Helper calls
- // op1 goes into REG_ARG_0 and
- // op2 goes into REG_ARG_1
- //
- addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
- src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
-#endif // NOGC_WRITE_BARRIERS
-
- // Both src and dst must reside in a register, which they should since we haven't set
- // either of them as contained.
- assert(addr->gtLsraInfo.dstCount == 1);
- assert(src->gtLsraInfo.dstCount == 1);
-}
-
-//-----------------------------------------------------------------------------------------
-// Specify register requirements for address expression of an indirection operation.
-//
-// Arguments:
-// indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node
-//
-void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
-{
- assert(indirTree->OperIsIndir());
- // If this is the rhs of a block copy (i.e. non-enregisterable struct),
- // it has no register requirements.
- if (indirTree->TypeGet() == TYP_STRUCT)
- {
- return;
- }
-
- GenTreePtr addr = indirTree->gtGetOp1();
- TreeNodeInfo* info = &(indirTree->gtLsraInfo);
-
- GenTreePtr base = nullptr;
- GenTreePtr index = nullptr;
- unsigned cns = 0;
- unsigned mul;
- bool rev;
- bool modifiedSources = false;
-
- if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
- {
- GenTreeAddrMode* lea = addr->AsAddrMode();
- base = lea->Base();
- index = lea->Index();
- cns = lea->gtOffset;
-
- m_lsra->clearOperandCounts(addr);
- // The srcCount is decremented because addr is now "contained",
- // then we account for the base and index below, if they are non-null.
- info->srcCount--;
- }
- else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
- !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index)))
- {
- // An addressing mode will be constructed that may cause some
- // nodes to not need a register, and cause others' lifetimes to be extended
- // to the GT_IND or even its parent if it's an assignment
-
- assert(base != addr);
- m_lsra->clearOperandCounts(addr);
-
- GenTreePtr arrLength = nullptr;
-
- // Traverse the computation below GT_IND to find the operands
- // for the addressing mode, marking the various constants and
- // intermediate results as not consuming/producing.
- // If the traversal were more complex, we might consider using
- // a traversal function, but the addressing mode is only made
- // up of simple arithmetic operators, and the code generator
- // only traverses one leg of each node.
-
- bool foundBase = (base == nullptr);
- bool foundIndex = (index == nullptr);
- GenTreePtr nextChild = nullptr;
- for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
- {
- nextChild = nullptr;
- GenTreePtr op1 = child->gtOp.gtOp1;
- GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
-
- if (op1 == base)
- {
- foundBase = true;
- }
- else if (op1 == index)
- {
- foundIndex = true;
- }
- else
- {
- m_lsra->clearOperandCounts(op1);
- if (!op1->OperIsLeaf())
- {
- nextChild = op1;
- }
- }
-
- if (op2 != nullptr)
- {
- if (op2 == base)
- {
- foundBase = true;
- }
- else if (op2 == index)
- {
- foundIndex = true;
- }
- else
- {
- m_lsra->clearOperandCounts(op2);
- if (!op2->OperIsLeaf())
- {
- assert(nextChild == nullptr);
- nextChild = op2;
- }
- }
- }
}
- assert(foundBase && foundIndex);
- info->srcCount--; // it gets incremented below.
- }
- else if (addr->gtOper == GT_ARR_ELEM)
- {
- // The GT_ARR_ELEM consumes all the indices and produces the offset.
- // The array object lives until the mem access.
- // We also consume the target register to which the address is
- // computed
-
- info->srcCount++;
- assert(addr->gtLsraInfo.srcCount >= 2);
- addr->gtLsraInfo.srcCount -= 1;
}
- else
- {
- // it is nothing but a plain indir
- info->srcCount--; // base gets added in below
- base = addr;
- }
-
- if (base != nullptr)
- {
- info->srcCount++;
- }
-
- if (index != nullptr && !modifiedSources)
- {
- info->srcCount++;
- }
-
- // On ARM64 we may need a single internal register
- // (when both conditions are true then we still only need a single internal register)
- if ((index != nullptr) && (cns != 0))
- {
- // ARM64 does not support both Index and offset so we need an internal register
- info->internalIntCount = 1;
- }
- else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree)))
- {
- // This offset can't be contained in the ldr/str instruction, so we need an internal register
- info->internalIntCount = 1;
- }
-}
-
-void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- info->srcCount = 2;
- info->dstCount = 1;
- CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
}
/* Lower GT_CAST(srcType, DstType) nodes.
diff --git a/src/jit/lowerxarch.cpp b/src/jit/lowerxarch.cpp
index 589cef482e..f89a3dfc7b 100644
--- a/src/jit/lowerxarch.cpp
+++ b/src/jit/lowerxarch.cpp
@@ -42,61 +42,11 @@ void Lowering::LowerRotate(GenTreePtr tree)
//
// Notes:
// This involves:
-// - Setting the appropriate candidates for a store of a multi-reg call return value.
-// - Requesting an internal register for SIMD12 stores.
-// - Handling of contained immediates and widening operations of unsigneds.
+// - Widening operations of unsigneds.
void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
{
- TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
-
- // Is this the case of var = call where call is returning
- // a value in multiple return registers?
GenTree* op1 = storeLoc->gtGetOp1();
- if (op1->IsMultiRegCall())
- {
- // backend expects to see this case only for store lclvar.
- assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
-
- // srcCount = number of registers in which the value is returned by call
- GenTreeCall* call = op1->AsCall();
- ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
-
- // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
- regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
- op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
- return;
- }
-
-#ifdef FEATURE_SIMD
- if (varTypeIsSIMD(storeLoc))
- {
- if (op1->IsCnsIntOrI())
- {
- // InitBlk
- MakeSrcContained(storeLoc, op1);
- }
- else if ((storeLoc->TypeGet() == TYP_SIMD12) && (storeLoc->OperGet() == GT_STORE_LCL_FLD))
- {
- // Need an additional register to extract upper 4 bytes of Vector3.
- info->internalFloatCount = 1;
- info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
-
- // In this case don't mark the operand as contained as we want it to
- // be evaluated into an xmm register
- }
- return;
- }
-#endif // FEATURE_SIMD
-
- // If the source is a containable immediate, make it contained, unless it is
- // an int-size or larger store of zero to memory, because we can generate smaller code
- // by zeroing a register and then storing it.
- if (IsContainableImmed(storeLoc, op1) && (!op1->IsIntegralConst(0) || varTypeIsSmall(storeLoc)))
- {
- MakeSrcContained(storeLoc, op1);
- }
// Try to widen the ops if they are going into a local var.
if ((storeLoc->gtOper == GT_STORE_LCL_VAR) && (storeLoc->gtOp1->gtOper == GT_CNS_INT))
@@ -148,1490 +98,8 @@ void Lowering::LowerStoreLoc(GenTreeLclVarCommon* storeLoc)
}
}
-/**
- * Takes care of annotating the register requirements
- * for every TreeNodeInfo struct that maps to each tree node.
- * Preconditions:
- * LSRA Has been initialized and there is a TreeNodeInfo node
- * already allocated and initialized for every tree in the IR.
- * Postconditions:
- * Every TreeNodeInfo instance has the right annotations on register
- * requirements needed by LSRA to build the Interval Table (source,
- * destination and internal [temp] register counts).
- * This code is refactored originally from LSRA.
- */
-void Lowering::TreeNodeInfoInit(GenTree* tree)
-{
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- switch (tree->OperGet())
- {
- GenTree* op1;
- GenTree* op2;
-
- default:
- TreeNodeInfoInitSimple(tree);
- break;
-
- case GT_LCL_FLD:
- case GT_LCL_VAR:
- info->srcCount = 0;
- info->dstCount = 1;
-
-#ifdef FEATURE_SIMD
- // Need an additional register to read upper 4 bytes of Vector3.
- if (tree->TypeGet() == TYP_SIMD12)
- {
- // We need an internal register different from targetReg in which 'tree' produces its result
- // because both targetReg and internal reg will be in use at the same time.
- info->internalFloatCount = 1;
- info->isInternalRegDelayFree = true;
- info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
- }
-#endif
- break;
-
- case GT_STORE_LCL_FLD:
- case GT_STORE_LCL_VAR:
-#ifdef _TARGET_X86_
- if (tree->gtGetOp1()->OperGet() == GT_LONG)
- {
- info->srcCount = 2;
- }
- else
-#endif // _TARGET_X86_
- {
- info->srcCount = 1;
- }
- info->dstCount = 0;
- LowerStoreLoc(tree->AsLclVarCommon());
- break;
-
- case GT_BOX:
- noway_assert(!"box should not exist here");
- // The result of 'op1' is also the final result
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_PHYSREGDST:
- info->srcCount = 1;
- info->dstCount = 0;
- break;
-
- case GT_COMMA:
- {
- GenTreePtr firstOperand;
- GenTreePtr secondOperand;
- if (tree->gtFlags & GTF_REVERSE_OPS)
- {
- firstOperand = tree->gtOp.gtOp2;
- secondOperand = tree->gtOp.gtOp1;
- }
- else
- {
- firstOperand = tree->gtOp.gtOp1;
- secondOperand = tree->gtOp.gtOp2;
- }
- if (firstOperand->TypeGet() != TYP_VOID)
- {
- firstOperand->gtLsraInfo.isLocalDefUse = true;
- firstOperand->gtLsraInfo.dstCount = 0;
- }
- if (tree->TypeGet() == TYP_VOID && secondOperand->TypeGet() != TYP_VOID)
- {
- secondOperand->gtLsraInfo.isLocalDefUse = true;
- secondOperand->gtLsraInfo.dstCount = 0;
- }
- }
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_LIST:
- case GT_FIELD_LIST:
- case GT_ARGPLACE:
- case GT_NO_OP:
- case GT_START_NONGC:
- case GT_PROF_HOOK:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_CNS_DBL:
- info->srcCount = 0;
- info->dstCount = 1;
- break;
-
-#if !defined(_TARGET_64BIT_)
-
- case GT_LONG:
- if ((tree->gtLIRFlags & LIR::Flags::IsUnusedValue) != 0)
- {
- // An unused GT_LONG node needs to consume its sources.
- info->srcCount = 2;
- }
- else
- {
- // Passthrough
- info->srcCount = 0;
- }
-
- info->dstCount = 0;
- break;
-
-#endif // !defined(_TARGET_64BIT_)
-
- case GT_QMARK:
- case GT_COLON:
- info->srcCount = 0;
- info->dstCount = 0;
- unreached();
- break;
-
- case GT_RETURN:
- TreeNodeInfoInitReturn(tree);
- break;
-
- case GT_RETFILT:
- if (tree->TypeGet() == TYP_VOID)
- {
- info->srcCount = 0;
- info->dstCount = 0;
- }
- else
- {
- assert(tree->TypeGet() == TYP_INT);
-
- info->srcCount = 1;
- info->dstCount = 0;
-
- info->setSrcCandidates(l, RBM_INTRET);
- tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
- }
- break;
-
- // A GT_NOP is either a passthrough (if it is void, or if it has
- // a child), but must be considered to produce a dummy value if it
- // has a type but no child
- case GT_NOP:
- info->srcCount = 0;
- if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
- {
- info->dstCount = 1;
- }
- else
- {
- info->dstCount = 0;
- }
- break;
-
- case GT_JTRUE:
- {
- info->srcCount = 0;
- info->dstCount = 0;
-
- GenTree* cmp = tree->gtGetOp1();
- l->clearDstCount(cmp);
-
-#ifdef FEATURE_SIMD
- // Say we have the following IR
- // simdCompareResult = GT_SIMD((In)Equality, v1, v2)
- // integerCompareResult = GT_EQ/NE(simdCompareResult, true/false)
- // GT_JTRUE(integerCompareResult)
- //
- // In this case we don't need to generate code for GT_EQ_/NE, since SIMD (In)Equality
- // intrinsic would set or clear Zero flag.
-
- genTreeOps cmpOper = cmp->OperGet();
- if (cmpOper == GT_EQ || cmpOper == GT_NE)
- {
- GenTree* cmpOp1 = cmp->gtGetOp1();
- GenTree* cmpOp2 = cmp->gtGetOp2();
-
- if (cmpOp1->IsSIMDEqualityOrInequality() && (cmpOp2->IsIntegralConst(0) || cmpOp2->IsIntegralConst(1)))
- {
- // clear dstCount on SIMD node to indicate that
- // result doesn't need to be materialized into a register.
- l->clearOperandCounts(cmp);
- l->clearDstCount(cmpOp1);
- l->clearOperandCounts(cmpOp2);
-
- // Codegen of SIMD (in)Equality uses target integer reg
- // only for setting flags. Target reg is not needed on AVX
- // when comparing against Vector Zero. In all other cases
- // we need to reserve an int type internal register, since we
- // have cleared dstCount.
- if (compiler->canUseAVX() && cmpOp1->gtGetOp2()->IsIntegralConstVector(0))
- {
- // We don't need an internal register,since we use vptest
- // for setting flags.
- }
- else
- {
- ++(cmpOp1->gtLsraInfo.internalIntCount);
- regMaskTP internalCandidates = cmpOp1->gtLsraInfo.getInternalCandidates(l);
- internalCandidates |= l->allRegs(TYP_INT);
- cmpOp1->gtLsraInfo.setInternalCandidates(l, internalCandidates);
- }
-
- // We would have to reverse compare oper in the following cases:
- // 1) SIMD Equality: Sets Zero flag on equal otherwise clears it.
- // Therefore, if compare oper is == or != against false(0), we will
- // be checking opposite of what is required.
- //
- // 2) SIMD inEquality: Clears Zero flag on true otherwise sets it.
- // Therefore, if compare oper is == or != against true(1), we will
- // be checking opposite of what is required.
- GenTreeSIMD* simdNode = cmpOp1->AsSIMD();
- if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality)
- {
- if (cmpOp2->IsIntegralConst(0))
- {
- cmp->SetOper(GenTree::ReverseRelop(cmpOper));
- }
- }
- else
- {
- assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality);
- if (cmpOp2->IsIntegralConst(1))
- {
- cmp->SetOper(GenTree::ReverseRelop(cmpOper));
- }
- }
- }
- }
-#endif // FEATURE_SIMD
- }
- break;
-
- case GT_JCC:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_JMP:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_SWITCH:
- // This should never occur since switch nodes must not be visible at this
- // point in the JIT.
- info->srcCount = 0;
- info->dstCount = 0; // To avoid getting uninit errors.
- noway_assert(!"Switch must be lowered at this point");
- break;
-
- case GT_JMPTABLE:
- info->srcCount = 0;
- info->dstCount = 1;
- break;
-
- case GT_SWITCH_TABLE:
- info->srcCount = 2;
- info->internalIntCount = 1;
- info->dstCount = 0;
- break;
-
- case GT_ASG:
- case GT_ASG_ADD:
- case GT_ASG_SUB:
- noway_assert(!"We should never hit any assignment operator in lowering");
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
-#if !defined(_TARGET_64BIT_)
- case GT_ADD_LO:
- case GT_ADD_HI:
- case GT_SUB_LO:
- case GT_SUB_HI:
-#endif
- case GT_ADD:
- case GT_SUB:
- // SSE2 arithmetic instructions doesn't support the form "op mem, xmm".
- // Rather they only support "op xmm, mem/xmm" form.
- if (varTypeIsFloating(tree->TypeGet()))
- {
- // overflow operations aren't supported on float/double types.
- assert(!tree->gtOverflow());
-
- op1 = tree->gtGetOp1();
- op2 = tree->gtGetOp2();
-
- // No implicit conversions at this stage as the expectation is that
- // everything is made explicit by adding casts.
- assert(op1->TypeGet() == op2->TypeGet());
-
- info->srcCount = 2;
- info->dstCount = 1;
-
- if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(tree, op2);
- }
- else if (tree->OperIsCommutative() &&
- (op1->IsCnsNonZeroFltOrDbl() || (op1->isMemoryOp() && IsSafeToContainMem(tree, op1))))
- {
- // Though we have GT_ADD(op1=memOp, op2=non-memOp, we try to reorder the operands
- // as long as it is safe so that the following efficient code sequence is generated:
- // addss/sd targetReg, memOp (if op1Reg == targetReg) OR
- // movaps targetReg, op2Reg; addss/sd targetReg, [memOp]
- //
- // Instead of
- // movss op1Reg, [memOp]; addss/sd targetReg, Op2Reg (if op1Reg == targetReg) OR
- // movss op1Reg, [memOp]; movaps targetReg, op1Reg, addss/sd targetReg, Op2Reg
- MakeSrcContained(tree, op1);
- }
- else
- {
- // If there are no containable operands, we can make an operand reg optional.
- SetRegOptionalForBinOp(tree);
- }
- break;
- }
-
- __fallthrough;
-
- case GT_AND:
- case GT_OR:
- case GT_XOR:
- TreeNodeInfoInitLogicalOp(tree);
- break;
-
- case GT_RETURNTRAP:
- // this just turns into a compare of its child with an int
- // + a conditional call
- info->srcCount = 1;
- info->dstCount = 0;
- if (tree->gtOp.gtOp1->isIndir())
- {
- MakeSrcContained(tree, tree->gtOp.gtOp1);
- }
- info->internalIntCount = 1;
- info->setInternalCandidates(l, l->allRegs(TYP_INT));
- break;
-
- case GT_MOD:
- case GT_DIV:
- case GT_UMOD:
- case GT_UDIV:
- TreeNodeInfoInitModDiv(tree);
- break;
-
- case GT_MUL:
- case GT_MULHI:
-#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
- case GT_MUL_LONG:
-#endif
- SetMulOpCounts(tree);
- break;
-
- case GT_INTRINSIC:
- TreeNodeInfoInitIntrinsic(tree);
- break;
-
-#ifdef FEATURE_SIMD
- case GT_SIMD:
- TreeNodeInfoInitSIMD(tree);
- break;
-#endif // FEATURE_SIMD
-
- case GT_CAST:
- TreeNodeInfoInitCast(tree);
- break;
-
- case GT_NEG:
- info->srcCount = 1;
- info->dstCount = 1;
-
- // TODO-XArch-CQ:
- // SSE instruction set doesn't have an instruction to negate a number.
- // The recommended way is to xor the float/double number with a bitmask.
- // The only way to xor is using xorps or xorpd both of which operate on
- // 128-bit operands. To hold the bit-mask we would need another xmm
- // register or a 16-byte aligned 128-bit data constant. Right now emitter
- // lacks the support for emitting such constants or instruction with mem
- // addressing mode referring to a 128-bit operand. For now we use an
- // internal xmm register to load 32/64-bit bitmask from data section.
- // Note that by trading additional data section memory (128-bit) we can
- // save on the need for an internal register and also a memory-to-reg
- // move.
- //
- // Note: another option to avoid internal register requirement is by
- // lowering as GT_SUB(0, src). This will generate code different from
- // Jit64 and could possibly result in compat issues (?).
- if (varTypeIsFloating(tree))
- {
- info->internalFloatCount = 1;
- info->setInternalCandidates(l, l->internalFloatRegCandidates());
- }
- else
- {
- // Codegen of this tree node sets ZF and SF flags.
- tree->gtFlags |= GTF_ZSF_SET;
- }
- break;
-
- case GT_NOT:
- info->srcCount = 1;
- info->dstCount = 1;
- break;
-
- case GT_LSH:
- case GT_RSH:
- case GT_RSZ:
- case GT_ROL:
- case GT_ROR:
-#ifdef _TARGET_X86_
- case GT_LSH_HI:
- case GT_RSH_LO:
-#endif
- TreeNodeInfoInitShiftRotate(tree);
- break;
-
- case GT_EQ:
- case GT_NE:
- case GT_LT:
- case GT_LE:
- case GT_GE:
- case GT_GT:
- TreeNodeInfoInitCmp(tree);
- break;
-
- case GT_CKFINITE:
- info->srcCount = 1;
- info->dstCount = 1;
- info->internalIntCount = 1;
- break;
-
- case GT_CMPXCHG:
- info->srcCount = 3;
- info->dstCount = 1;
-
- // comparand is preferenced to RAX.
- // Remaining two operands can be in any reg other than RAX.
- tree->gtCmpXchg.gtOpComparand->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
- tree->gtCmpXchg.gtOpLocation->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RAX);
- tree->gtCmpXchg.gtOpValue->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RAX);
- tree->gtLsraInfo.setDstCandidates(l, RBM_RAX);
- break;
-
- case GT_LOCKADD:
- info->srcCount = 2;
- info->dstCount = 0;
-
- CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
- break;
-
- case GT_CALL:
- TreeNodeInfoInitCall(tree->AsCall());
- break;
-
- case GT_ADDR:
- {
- // For a GT_ADDR, the child node should not be evaluated into a register
- GenTreePtr child = tree->gtOp.gtOp1;
- assert(!l->isCandidateLocalRef(child));
- l->clearDstCount(child);
- info->srcCount = 0;
- info->dstCount = 1;
- }
- break;
-
-#if !defined(FEATURE_PUT_STRUCT_ARG_STK)
- case GT_OBJ:
-#endif
- case GT_BLK:
- case GT_DYN_BLK:
- // These should all be eliminated prior to Lowering.
- assert(!"Non-store block node in Lowering");
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
-#ifdef FEATURE_PUT_STRUCT_ARG_STK
- case GT_PUTARG_STK:
- TreeNodeInfoInitPutArgStk(tree->AsPutArgStk());
- break;
-#endif // FEATURE_PUT_STRUCT_ARG_STK
-
- case GT_STORE_BLK:
- case GT_STORE_OBJ:
- case GT_STORE_DYN_BLK:
- TreeNodeInfoInitBlockStore(tree->AsBlk());
- break;
-
- case GT_INIT_VAL:
- // Always a passthrough of its child's value.
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_LCLHEAP:
- TreeNodeInfoInitLclHeap(tree);
- break;
-
- case GT_ARR_BOUNDS_CHECK:
-#ifdef FEATURE_SIMD
- case GT_SIMD_CHK:
-#endif // FEATURE_SIMD
- {
- GenTreeBoundsChk* node = tree->AsBoundsChk();
- // Consumes arrLen & index - has no result
- info->srcCount = 2;
- info->dstCount = 0;
-
- GenTreePtr other;
- if (CheckImmedAndMakeContained(tree, node->gtIndex))
- {
- other = node->gtArrLen;
- }
- else if (CheckImmedAndMakeContained(tree, node->gtArrLen))
- {
- other = node->gtIndex;
- }
- else if (node->gtIndex->isMemoryOp())
- {
- other = node->gtIndex;
- }
- else
- {
- other = node->gtArrLen;
- }
-
- if (node->gtIndex->TypeGet() == node->gtArrLen->TypeGet())
- {
- if (other->isMemoryOp())
- {
- MakeSrcContained(tree, other);
- }
- else
- {
- // We can mark 'other' as reg optional, since it is not contained.
- SetRegOptional(other);
- }
- }
- }
- break;
-
- case GT_ARR_ELEM:
- // These must have been lowered to GT_ARR_INDEX
- noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-
- case GT_ARR_INDEX:
- info->srcCount = 2;
- info->dstCount = 1;
- // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
- // times while the result is being computed.
- tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
- info->hasDelayFreeSrc = true;
- break;
-
- case GT_ARR_OFFSET:
- // This consumes the offset, if any, the arrObj and the effective index,
- // and produces the flattened offset for this dimension.
- info->srcCount = 3;
- info->dstCount = 1;
-
- // we don't want to generate code for this
- if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
- {
- MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
- }
- else
- {
- // Here we simply need an internal register, which must be different
- // from any of the operand's registers, but may be the same as targetReg.
- info->internalIntCount = 1;
- }
- break;
-
- case GT_LEA:
- // The LEA usually passes its operands through to the GT_IND, in which case we'll
- // clear the info->srcCount and info->dstCount later, but we may be instantiating an address,
- // so we set them here.
- info->srcCount = 0;
- if (tree->AsAddrMode()->HasBase())
- {
- info->srcCount++;
- }
- if (tree->AsAddrMode()->HasIndex())
- {
- info->srcCount++;
- }
- info->dstCount = 1;
- break;
-
- case GT_STOREIND:
- {
- info->srcCount = 2;
- info->dstCount = 0;
- GenTree* src = tree->gtOp.gtOp2;
-
- if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
- {
- LowerGCWriteBarrier(tree);
- break;
- }
-
- // If the source is a containable immediate, make it contained, unless it is
- // an int-size or larger store of zero to memory, because we can generate smaller code
- // by zeroing a register and then storing it.
- if (IsContainableImmed(tree, src) &&
- (!src->IsIntegralConst(0) || varTypeIsSmall(tree) || tree->gtGetOp1()->OperGet() == GT_CLS_VAR_ADDR))
- {
- MakeSrcContained(tree, src);
- }
- else if (!varTypeIsFloating(tree))
- {
- // Perform recognition of trees with the following structure:
- // StoreInd(addr, BinOp(expr, GT_IND(addr)))
- // to be able to fold this into an instruction of the form
- // BINOP [addr], register
- // where register is the actual place where 'expr' is computed.
- //
- // SSE2 doesn't support RMW form of instructions.
- if (SetStoreIndOpCountsIfRMWMemOp(tree))
- {
- break;
- }
- }
-
- SetIndirAddrOpCounts(tree);
- }
- break;
-
- case GT_NULLCHECK:
- info->dstCount = 0;
- info->srcCount = 1;
- info->isLocalDefUse = true;
- break;
-
- case GT_IND:
- info->dstCount = 1;
- info->srcCount = 1;
- SetIndirAddrOpCounts(tree);
- break;
-
- case GT_CATCH_ARG:
- info->srcCount = 0;
- info->dstCount = 1;
- info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
- break;
-
-#if !FEATURE_EH_FUNCLETS
- case GT_END_LFIN:
- info->srcCount = 0;
- info->dstCount = 0;
- break;
-#endif
-
- case GT_CLS_VAR:
- // These nodes are eliminated by rationalizer.
- JITDUMP("Unexpected node %s in Lower.\n", GenTree::NodeName(tree->OperGet()));
- unreached();
- break;
- } // end switch (tree->OperGet())
-
- // If op2 of a binary-op gets marked as contained, then binary-op srcCount will be 1.
- // Even then we would like to set isTgtPref on Op1.
- if (tree->OperIsBinary() && info->srcCount >= 1)
- {
- if (isRMWRegOper(tree))
- {
- GenTree* op1 = tree->gtOp.gtOp1;
- GenTree* op2 = tree->gtOp.gtOp2;
-
- // Commutative opers like add/mul/and/or/xor could reverse the order of
- // operands if it is safe to do so. In such a case we would like op2 to be
- // target preferenced instead of op1.
- if (tree->OperIsCommutative() && op1->gtLsraInfo.dstCount == 0 && op2 != nullptr)
- {
- op1 = op2;
- op2 = tree->gtOp.gtOp1;
- }
-
- // If we have a read-modify-write operation, we want to preference op1 to the target.
- // If op1 is contained, we don't want to preference it, but it won't
- // show up as a source in that case, so it will be ignored.
- op1->gtLsraInfo.isTgtPref = true;
-
- // Is this a non-commutative operator, or is op2 a contained memory op?
- // (Note that we can't call IsContained() at this point because it uses exactly the
- // same information we're currently computing.)
- // In either case, we need to make op2 remain live until the op is complete, by marking
- // the source(s) associated with op2 as "delayFree".
- // Note that if op2 of a binary RMW operator is a memory op, even if the operator
- // is commutative, codegen cannot reverse them.
- // TODO-XArch-CQ: This is not actually the case for all RMW binary operators, but there's
- // more work to be done to correctly reverse the operands if they involve memory
- // operands. Also, we may need to handle more cases than GT_IND, especially once
- // we've modified the register allocator to not require all nodes to be assigned
- // a register (e.g. a spilled lclVar can often be referenced directly from memory).
- // Note that we may have a null op2, even with 2 sources, if op1 is a base/index memory op.
-
- GenTree* delayUseSrc = nullptr;
- // TODO-XArch-Cleanup: We should make the indirection explicit on these nodes so that we don't have
- // to special case them.
- if (tree->OperGet() == GT_XADD || tree->OperGet() == GT_XCHG || tree->OperGet() == GT_LOCKADD)
- {
- delayUseSrc = op1;
- }
- else if ((op2 != nullptr) &&
- (!tree->OperIsCommutative() || (op2->isMemoryOp() && (op2->gtLsraInfo.srcCount == 0))))
- {
- delayUseSrc = op2;
- }
- if (delayUseSrc != nullptr)
- {
- // If delayUseSrc is an indirection and it doesn't produce a result, then we need to set "delayFree'
- // on the base & index, if any.
- // Otherwise, we set it on delayUseSrc itself.
- if (delayUseSrc->isIndir() && (delayUseSrc->gtLsraInfo.dstCount == 0))
- {
- GenTree* base = delayUseSrc->AsIndir()->Base();
- GenTree* index = delayUseSrc->AsIndir()->Index();
- if (base != nullptr)
- {
- base->gtLsraInfo.isDelayFree = true;
- }
- if (index != nullptr)
- {
- index->gtLsraInfo.isDelayFree = true;
- }
- }
- else
- {
- delayUseSrc->gtLsraInfo.isDelayFree = true;
- }
- info->hasDelayFreeSrc = true;
- }
- }
- }
-
- TreeNodeInfoInitCheckByteable(tree);
-
- // We need to be sure that we've set info->srcCount and info->dstCount appropriately
- assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT));
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCheckByteable: Check the tree to see if "byte-able" registers are
-// required, and set the tree node info accordingly.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree)
-{
-#ifdef _TARGET_X86_
- LinearScan* l = m_lsra;
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- // Exclude RBM_NON_BYTE_REGS from dst candidates of tree node and src candidates of operands
- // if the tree node is a byte type.
- //
- // Though this looks conservative in theory, in practice we could not think of a case where
- // the below logic leads to conservative register specification. In future when or if we find
- // one such case, this logic needs to be fine tuned for that case(s).
-
- if (ExcludeNonByteableRegisters(tree))
- {
- regMaskTP regMask;
- if (info->dstCount > 0)
- {
- regMask = info->getDstCandidates(l);
- assert(regMask != RBM_NONE);
- info->setDstCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
- }
-
- if (tree->OperIsSimple() && (info->srcCount > 0))
- {
- // No need to set src candidates on a contained child operand.
- GenTree* op = tree->gtOp.gtOp1;
- assert(op != nullptr);
- bool containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
- if (!containedNode)
- {
- regMask = op->gtLsraInfo.getSrcCandidates(l);
- assert(regMask != RBM_NONE);
- op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
- }
-
- if (tree->OperIsBinary() && (tree->gtOp.gtOp2 != nullptr))
- {
- op = tree->gtOp.gtOp2;
- containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
- if (!containedNode)
- {
- regMask = op->gtLsraInfo.getSrcCandidates(l);
- assert(regMask != RBM_NONE);
- op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
- }
- }
- }
- }
-#endif //_TARGET_X86_
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitSimple: Sets the srcCount and dstCount for all the trees
-// without special handling based on the tree node type.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitSimple(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- unsigned kind = tree->OperKind();
- info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
- if (kind & (GTK_CONST | GTK_LEAF))
- {
- info->srcCount = 0;
- }
- else if (kind & (GTK_SMPOP))
- {
- if (tree->gtGetOp2() != nullptr)
- {
- info->srcCount = 2;
- }
- else
- {
- info->srcCount = 1;
- }
- }
- else
- {
- unreached();
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
-#if !defined(_TARGET_64BIT_)
- if (tree->TypeGet() == TYP_LONG)
- {
- GenTree* op1 = tree->gtGetOp1();
- noway_assert(op1->OperGet() == GT_LONG);
- GenTree* loVal = op1->gtGetOp1();
- GenTree* hiVal = op1->gtGetOp2();
- info->srcCount = 2;
- loVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_LO);
- hiVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_HI);
- info->dstCount = 0;
- }
- else
-#endif // !defined(_TARGET_64BIT_)
- {
- GenTree* op1 = tree->gtGetOp1();
- regMaskTP useCandidates = RBM_NONE;
-
- info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
- info->dstCount = 0;
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- if (varTypeIsStruct(tree))
- {
- // op1 has to be either an lclvar or a multi-reg returning call
- if (op1->OperGet() == GT_LCL_VAR)
- {
- GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
- LclVarDsc* varDsc = &(compiler->lvaTable[lclVarCommon->gtLclNum]);
- assert(varDsc->lvIsMultiRegRet);
-
- // Mark var as contained if not enregistrable.
- if (!varTypeIsEnregisterableStruct(op1))
- {
- MakeSrcContained(tree, op1);
- }
- }
- else
- {
- noway_assert(op1->IsMultiRegCall());
-
- ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
- info->srcCount = retTypeDesc->GetReturnRegCount();
- useCandidates = retTypeDesc->GetABIReturnRegs();
- }
- }
- else
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- {
- // Non-struct type return - determine useCandidates
- switch (tree->TypeGet())
- {
- case TYP_VOID:
- useCandidates = RBM_NONE;
- break;
- case TYP_FLOAT:
- useCandidates = RBM_FLOATRET;
- break;
- case TYP_DOUBLE:
- useCandidates = RBM_DOUBLERET;
- break;
-#if defined(_TARGET_64BIT_)
- case TYP_LONG:
- useCandidates = RBM_LNGRET;
- break;
-#endif // defined(_TARGET_64BIT_)
- default:
- useCandidates = RBM_INTRET;
- break;
- }
- }
-
- if (useCandidates != RBM_NONE)
- {
- op1->gtLsraInfo.setSrcCandidates(l, useCandidates);
- }
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitShiftRotate: Set the NodeInfo for a shift or rotate.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
-
- info->srcCount = 2;
- info->dstCount = 1;
-
- // For shift operations, we need that the number
- // of bits moved gets stored in CL in case
- // the number of bits to shift is not a constant.
- GenTreePtr shiftBy = tree->gtOp.gtOp2;
- GenTreePtr source = tree->gtOp.gtOp1;
-
-#ifdef _TARGET_X86_
- // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
- // we can have a three operand form. Increment the srcCount.
- if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
- {
- assert(source->OperGet() == GT_LONG);
-
- info->srcCount++;
-
- if (tree->OperGet() == GT_LSH_HI)
- {
- GenTreePtr sourceLo = source->gtOp.gtOp1;
- sourceLo->gtLsraInfo.isDelayFree = true;
- }
- else
- {
- GenTreePtr sourceHi = source->gtOp.gtOp2;
- sourceHi->gtLsraInfo.isDelayFree = true;
- }
-
- source->gtLsraInfo.hasDelayFreeSrc = true;
- info->hasDelayFreeSrc = true;
- }
-#endif
-
- // x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off)
- // We will allow whatever can be encoded - hope you know what you are doing.
- if (!IsContainableImmed(tree, shiftBy) || (shiftBy->gtIntConCommon.IconValue() > 255) ||
- (shiftBy->gtIntConCommon.IconValue() < 0))
- {
- source->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RCX);
- shiftBy->gtLsraInfo.setSrcCandidates(l, RBM_RCX);
- info->setDstCandidates(l, l->allRegs(TYP_INT) & ~RBM_RCX);
- }
- else
- {
- MakeSrcContained(tree, shiftBy);
-
- // Note that Rotate Left/Right instructions don't set ZF and SF flags.
- //
- // If the operand being shifted is 32-bits then upper three bits are masked
- // by hardware to get actual shift count. Similarly for 64-bit operands
- // shift count is narrowed to [0..63]. If the resulting shift count is zero,
- // then shift operation won't modify flags.
- //
- // TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0)
- // if the shift count is known to be non-zero and in the range depending on the
- // operand size.
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCall: Set the NodeInfo for a call.
-//
-// Arguments:
-// call - The call node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
-{
- TreeNodeInfo* info = &(call->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
- bool hasMultiRegRetVal = false;
- ReturnTypeDesc* retTypeDesc = nullptr;
-
- info->srcCount = 0;
- if (call->TypeGet() != TYP_VOID)
- {
- hasMultiRegRetVal = call->HasMultiRegRetVal();
- if (hasMultiRegRetVal)
- {
- // dst count = number of registers in which the value is returned by call
- retTypeDesc = call->GetReturnTypeDesc();
- info->dstCount = retTypeDesc->GetReturnRegCount();
- }
- else
- {
- info->dstCount = 1;
- }
- }
- else
- {
- info->dstCount = 0;
- }
-
- GenTree* ctrlExpr = call->gtControlExpr;
- if (call->gtCallType == CT_INDIRECT)
- {
- // either gtControlExpr != null or gtCallAddr != null.
- // Both cannot be non-null at the same time.
- assert(ctrlExpr == nullptr);
- assert(call->gtCallAddr != nullptr);
- ctrlExpr = call->gtCallAddr;
-
-#ifdef _TARGET_X86_
- // Fast tail calls aren't currently supported on x86, but if they ever are, the code
- // below that handles indirect VSD calls will need to be fixed.
- assert(!call->IsFastTailCall() || !call->IsVirtualStub());
-#endif // _TARGET_X86_
- }
-
- // set reg requirements on call target represented as control sequence.
- if (ctrlExpr != nullptr)
- {
- // we should never see a gtControlExpr whose type is void.
- assert(ctrlExpr->TypeGet() != TYP_VOID);
-
- // call can take a Rm op on x64
- info->srcCount++;
-
- // In case of fast tail implemented as jmp, make sure that gtControlExpr is
- // computed into a register.
- if (!call->IsFastTailCall())
- {
-#ifdef _TARGET_X86_
- // On x86, we need to generate a very specific pattern for indirect VSD calls:
- //
- // 3-byte nop
- // call dword ptr [eax]
- //
- // Where EAX is also used as an argument to the stub dispatch helper. Make
- // sure that the call target address is computed into EAX in this case.
- if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
- {
- assert(ctrlExpr->isIndir());
-
- ctrlExpr->gtGetOp1()->gtLsraInfo.setSrcCandidates(l, RBM_VIRTUAL_STUB_TARGET);
- MakeSrcContained(call, ctrlExpr);
- }
- else
-#endif // _TARGET_X86_
- if (ctrlExpr->isIndir())
- {
- MakeSrcContained(call, ctrlExpr);
- }
- }
- else
- {
- // Fast tail call - make sure that call target is always computed in RAX
- // so that epilog sequence can generate "jmp rax" to achieve fast tail call.
- ctrlExpr->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
- }
- }
-
- // If this is a varargs call, we will clear the internal candidates in case we need
- // to reserve some integer registers for copying float args.
- // We have to do this because otherwise the default candidates are allRegs, and adding
- // the individual specific registers will have no effect.
- if (call->IsVarargs())
- {
- info->setInternalCandidates(l, RBM_NONE);
- }
-
- RegisterType registerType = call->TypeGet();
-
- // Set destination candidates for return value of the call.
- CLANG_FORMAT_COMMENT_ANCHOR;
-
-#ifdef _TARGET_X86_
- if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
- {
- // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
- // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
- // correct argument registers.
- info->setDstCandidates(l, RBM_PINVOKE_TCB);
- }
- else
-#endif // _TARGET_X86_
- if (hasMultiRegRetVal)
- {
- assert(retTypeDesc != nullptr);
- info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
- }
- else if (varTypeIsFloating(registerType))
- {
-#ifdef _TARGET_X86_
- // The return value will be on the X87 stack, and we will need to move it.
- info->setDstCandidates(l, l->allRegs(registerType));
-#else // !_TARGET_X86_
- info->setDstCandidates(l, RBM_FLOATRET);
-#endif // !_TARGET_X86_
- }
- else if (registerType == TYP_LONG)
- {
- info->setDstCandidates(l, RBM_LNGRET);
- }
- else
- {
- info->setDstCandidates(l, RBM_INTRET);
- }
-
- // number of args to a call =
- // callRegArgs + (callargs - placeholders, setup, etc)
- // there is an explicit thisPtr but it is redundant
-
- // If there is an explicit this pointer, we don't want that node to produce anything
- // as it is redundant
- if (call->gtCallObjp != nullptr)
- {
- GenTreePtr thisPtrNode = call->gtCallObjp;
-
- if (thisPtrNode->gtOper == GT_PUTARG_REG)
- {
- l->clearOperandCounts(thisPtrNode);
- l->clearDstCount(thisPtrNode->gtOp.gtOp1);
- }
- else
- {
- l->clearDstCount(thisPtrNode);
- }
- }
-
-#if FEATURE_VARARG
- bool callHasFloatRegArgs = false;
-#endif // !FEATURE_VARARG
-
- // First, count reg args
- for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
- {
- assert(list->OperIsList());
-
- GenTreePtr argNode = list->Current();
-
- fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
- assert(curArgTabEntry);
-
- if (curArgTabEntry->regNum == REG_STK)
- {
- // late arg that is not passed in a register
- DISPNODE(argNode);
- assert(argNode->gtOper == GT_PUTARG_STK);
- argNode->gtLsraInfo.srcCount = 1;
- argNode->gtLsraInfo.dstCount = 0;
-
-#ifdef FEATURE_PUT_STRUCT_ARG_STK
- // If the node is TYP_STRUCT and it is put on stack with
- // putarg_stk operation, we consume and produce no registers.
- // In this case the embedded Obj node should not produce
- // registers too since it is contained.
- // Note that if it is a SIMD type the argument will be in a register.
- if (argNode->TypeGet() == TYP_STRUCT)
- {
- assert(argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_OBJ);
- argNode->gtOp.gtOp1->gtLsraInfo.dstCount = 0;
- argNode->gtLsraInfo.srcCount = 0;
- }
-#endif // FEATURE_PUT_STRUCT_ARG_STK
- continue;
- }
-
- regNumber argReg = REG_NA;
- regMaskTP argMask = RBM_NONE;
- short regCount = 0;
- bool isOnStack = true;
- if (curArgTabEntry->regNum != REG_STK)
- {
- isOnStack = false;
- var_types argType = argNode->TypeGet();
-
-#if FEATURE_VARARG
- callHasFloatRegArgs |= varTypeIsFloating(argType);
-#endif // !FEATURE_VARARG
-
- argReg = curArgTabEntry->regNum;
- regCount = 1;
-
- // Default case is that we consume one source; modify this later (e.g. for
- // promoted structs)
- info->srcCount++;
-
- argMask = genRegMask(argReg);
- argNode = argNode->gtEffectiveVal();
- }
-
- // If the struct arg is wrapped in CPYBLK the type of the param will be TYP_VOID.
- // Use the curArgTabEntry's isStruct to get whether the param is a struct.
- if (varTypeIsStruct(argNode) PUT_STRUCT_ARG_STK_ONLY(|| curArgTabEntry->isStruct))
- {
- unsigned originalSize = 0;
- LclVarDsc* varDsc = nullptr;
- if (argNode->gtOper == GT_LCL_VAR)
- {
- varDsc = compiler->lvaTable + argNode->gtLclVarCommon.gtLclNum;
- originalSize = varDsc->lvSize();
- }
- else if (argNode->gtOper == GT_MKREFANY)
- {
- originalSize = 2 * TARGET_POINTER_SIZE;
- }
- else if (argNode->gtOper == GT_OBJ)
- {
- noway_assert(!"GT_OBJ not supported for amd64");
- }
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- else if (argNode->gtOper == GT_PUTARG_REG)
- {
- originalSize = genTypeSize(argNode->gtType);
- }
- else if (argNode->gtOper == GT_FIELD_LIST)
- {
- originalSize = 0;
-
- // There could be up to 2 PUTARG_REGs in the list
- GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
- unsigned iterationNum = 0;
- for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest())
- {
- GenTreePtr putArgRegNode = fieldListPtr->Current();
- assert(putArgRegNode->gtOper == GT_PUTARG_REG);
-
- if (iterationNum == 0)
- {
- varDsc = compiler->lvaTable + putArgRegNode->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
- originalSize = varDsc->lvSize();
- assert(originalSize != 0);
- }
- else
- {
- // Need an extra source for every node, but the first in the list.
- info->srcCount++;
-
- // Get the mask for the second putarg_reg
- argMask = genRegMask(curArgTabEntry->otherRegNum);
- }
-
- putArgRegNode->gtLsraInfo.setDstCandidates(l, argMask);
- putArgRegNode->gtLsraInfo.setSrcCandidates(l, argMask);
-
- // To avoid redundant moves, have the argument child tree computed in the
- // register in which the argument is passed to the call.
- putArgRegNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(putArgRegNode));
- iterationNum++;
- }
-
- assert(iterationNum <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
- }
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- else
- {
- noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
- }
-
- unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
- unsigned remainingSlots = slots;
-
- if (!isOnStack)
- {
- remainingSlots = slots - 1;
-
- regNumber reg = (regNumber)(argReg + 1);
- while (remainingSlots > 0 && reg <= REG_ARG_LAST)
- {
- argMask |= genRegMask(reg);
- reg = (regNumber)(reg + 1);
- remainingSlots--;
- regCount++;
- }
- }
-
- short internalIntCount = 0;
- if (remainingSlots > 0)
- {
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- // This TYP_STRUCT argument is also passed in the outgoing argument area
- // We need a register to address the TYP_STRUCT
- internalIntCount = 1;
-#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
- // And we may need 2
- internalIntCount = 2;
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- }
- argNode->gtLsraInfo.internalIntCount = internalIntCount;
-
-#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
- if (argNode->gtOper == GT_PUTARG_REG)
- {
- argNode->gtLsraInfo.setDstCandidates(l, argMask);
- argNode->gtLsraInfo.setSrcCandidates(l, argMask);
- }
-#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
- }
- else
- {
- argNode->gtLsraInfo.setDstCandidates(l, argMask);
- argNode->gtLsraInfo.setSrcCandidates(l, argMask);
- }
-
- // To avoid redundant moves, have the argument child tree computed in the
- // register in which the argument is passed to the call.
- if (argNode->gtOper == GT_PUTARG_REG)
- {
- argNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(argNode));
- }
-
-#if FEATURE_VARARG
- // In the case of a varargs call, the ABI dictates that if we have floating point args,
- // we must pass the enregistered arguments in both the integer and floating point registers.
- // Since the integer register is not associated with this arg node, we will reserve it as
- // an internal register so that it is not used during the evaluation of the call node
- // (e.g. for the target).
- if (call->IsVarargs() && varTypeIsFloating(argNode))
- {
- regNumber targetReg = compiler->getCallArgIntRegister(argReg);
- info->setInternalIntCount(info->internalIntCount + 1);
- info->addInternalCandidates(l, genRegMask(targetReg));
- }
-#endif // FEATURE_VARARG
- }
-
- // Now, count stack args
- // Note that these need to be computed into a register, but then
- // they're just stored to the stack - so the reg doesn't
- // need to remain live until the call. In fact, it must not
- // because the code generator doesn't actually consider it live,
- // so it can't be spilled.
-
- GenTreePtr args = call->gtCallArgs;
- while (args)
- {
- GenTreePtr arg = args->gtOp.gtOp1;
- if (!(args->gtFlags & GTF_LATE_ARG))
- {
- TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
-#if !defined(_TARGET_64BIT_)
- if (arg->TypeGet() == TYP_LONG)
- {
- assert(arg->OperGet() == GT_LONG);
- GenTreePtr loArg = arg->gtGetOp1();
- GenTreePtr hiArg = arg->gtGetOp2();
- assert((loArg->OperGet() == GT_PUTARG_STK) && (hiArg->OperGet() == GT_PUTARG_STK));
- assert((loArg->gtLsraInfo.dstCount == 1) && (hiArg->gtLsraInfo.dstCount == 1));
- loArg->gtLsraInfo.isLocalDefUse = true;
- hiArg->gtLsraInfo.isLocalDefUse = true;
- }
- else
-#endif // !defined(_TARGET_64BIT_)
- {
- if (argInfo->dstCount != 0)
- {
- argInfo->isLocalDefUse = true;
- }
-
- // If the child of GT_PUTARG_STK is a constant, we don't need a register to
- // move it to memory (stack location).
- //
- // On AMD64, we don't want to make 0 contained, because we can generate smaller code
- // by zeroing a register and then storing it. E.g.:
- // xor rdx, rdx
- // mov gword ptr [rsp+28H], rdx
- // is 2 bytes smaller than:
- // mov gword ptr [rsp+28H], 0
- //
- // On x86, we push stack arguments; we don't use 'mov'. So:
- // push 0
- // is 1 byte smaller than:
- // xor rdx, rdx
- // push rdx
-
- argInfo->dstCount = 0;
- if (arg->gtOper == GT_PUTARG_STK)
- {
- GenTree* op1 = arg->gtOp.gtOp1;
- if (IsContainableImmed(arg, op1)
-#if defined(_TARGET_AMD64_)
- && !op1->IsIntegralConst(0)
-#endif // _TARGET_AMD64_
- )
- {
- MakeSrcContained(arg, op1);
- }
- }
- }
- }
- args = args->gtOp.gtOp2;
- }
-
-#if FEATURE_VARARG
- // If it is a fast tail call, it is already preferenced to use RAX.
- // Therefore, no need set src candidates on call tgt again.
- if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
- {
- // Don't assign the call target to any of the argument registers because
- // we will use them to also pass floating point arguments as required
- // by Amd64 ABI.
- ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
- }
-#endif // !FEATURE_VARARG
-}
-
//------------------------------------------------------------------------
-// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
+// LowerBlockStore: Set block store type
//
// Arguments:
// blkNode - The block store node of interest
@@ -1639,25 +107,15 @@ void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+void Lowering::LowerBlockStore(GenTreeBlk* blkNode)
{
- GenTree* dstAddr = blkNode->Addr();
- unsigned size = blkNode->gtBlkSize;
- GenTree* source = blkNode->Data();
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
- // Sources are dest address, initVal or source.
- // We may require an additional source or temp register for the size.
- blkNode->gtLsraInfo.srcCount = 2;
- blkNode->gtLsraInfo.dstCount = 0;
- blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
+ Compiler* compiler = comp;
GenTreePtr srcAddrOrFill = nullptr;
bool isInitBlk = blkNode->OperIsInitBlkOp();
- regMaskTP dstAddrRegMask = RBM_NONE;
- regMaskTP sourceRegMask = RBM_NONE;
- regMaskTP blkSizeRegMask = RBM_NONE;
if (!isInitBlk)
{
// CopyObj or CopyBlk
@@ -1668,20 +126,6 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
if (source->gtOper == GT_IND)
{
srcAddrOrFill = blkNode->Data()->gtGetOp1();
- // We're effectively setting source as contained, but can't call MakeSrcContained, because the
- // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
- // If srcAddr is already non-contained, we don't need to change it.
- if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0)
- {
- srcAddrOrFill->gtLsraInfo.setDstCount(1);
- srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
- }
- m_lsra->clearOperandCounts(source);
- }
- else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
- {
- assert(source->IsLocal());
- MakeSrcContained(blkNode, source);
}
}
@@ -1735,58 +179,18 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
initVal->gtIntCon.gtIconVal = 0x01010101 * fill;
#endif // !_TARGET_AMD64_
- // In case we have a buffer >= 16 bytes
- // we can use SSE2 to do a 128-bit store in a single
- // instruction.
- if (size >= XMM_REGSIZE_BYTES)
- {
- // Reserve an XMM register to fill it with
- // a pack of 16 init value constants.
- blkNode->gtLsraInfo.internalFloatCount = 1;
- blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates());
- if ((fill == 0) && ((size & 0xf) == 0))
- {
- MakeSrcContained(blkNode, source);
- }
- }
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
-
-#ifdef _TARGET_X86_
- if ((size & 1) != 0)
- {
- // On x86, you can't address the lower byte of ESI, EDI, ESP, or EBP when doing
- // a "mov byte ptr [dest], val". If the fill size is odd, we will try to do this
- // when unrolling, so only allow byteable registers as the source value. (We could
- // consider just using BlkOpKindRepInstr instead.)
- sourceRegMask = RBM_BYTE_REGS;
- }
-#endif // _TARGET_X86_
}
else
{
- // rep stos has the following register requirements:
- // a) The memory address to be in RDI.
- // b) The fill value has to be in RAX.
- // c) The buffer size will go in RCX.
- dstAddrRegMask = RBM_RDI;
- srcAddrOrFill = initVal;
- sourceRegMask = RBM_RAX;
- blkSizeRegMask = RBM_RCX;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
}
}
else
{
#ifdef _TARGET_AMD64_
- // The helper follows the regular AMD64 ABI.
- dstAddrRegMask = RBM_ARG_0;
- sourceRegMask = RBM_ARG_1;
- blkSizeRegMask = RBM_ARG_2;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
#else // !_TARGET_AMD64_
- dstAddrRegMask = RBM_RDI;
- sourceRegMask = RBM_RAX;
- blkSizeRegMask = RBM_RCX;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
#endif // !_TARGET_AMD64_
}
@@ -1870,19 +274,12 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
if (IsRepMovsProfitable)
{
// We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq.
- blkSizeRegMask = RBM_RCX;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
}
else
{
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
}
-
- dstAddrRegMask = RBM_RDI;
-
- // The srcAddr must be in a register. If it was under a GT_IND, we need to subsume all of its
- // sources.
- sourceRegMask = RBM_RSI;
}
else
{
@@ -1903,119 +300,31 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// our framework assemblies, so this is the main code generation scheme we'll use.
if (size <= CPBLK_UNROLL_LIMIT)
{
- // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
- //
- // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
- // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude
- // RBM_NON_BYTE_REGS from internal candidates.
- if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
- {
- blkNode->gtLsraInfo.internalIntCount++;
- regMaskTP regMask = l->allRegs(TYP_INT);
-
-#ifdef _TARGET_X86_
- if ((size % 2) != 0)
- {
- regMask &= ~RBM_NON_BYTE_REGS;
- }
-#endif
- blkNode->gtLsraInfo.setInternalCandidates(l, regMask);
- }
-
- if (size >= XMM_REGSIZE_BYTES)
- {
- // If we have a buffer larger than XMM_REGSIZE_BYTES,
- // reserve an XMM register to use it for a
- // series of 16-byte loads and stores.
- blkNode->gtLsraInfo.internalFloatCount = 1;
- blkNode->gtLsraInfo.addInternalCandidates(l, l->internalFloatRegCandidates());
- }
-
- // If src or dst are on stack, we don't have to generate the address into a register
- // because it's just some constant+SP
- if (srcAddrOrFill != nullptr && srcAddrOrFill->OperIsLocalAddr())
- {
- MakeSrcContained(blkNode, srcAddrOrFill);
- }
-
- if (dstAddr->OperIsLocalAddr())
- {
- MakeSrcContained(blkNode, dstAddr);
- }
-
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindUnroll;
}
else
{
- blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
- dstAddrRegMask = RBM_RDI;
- sourceRegMask = RBM_RSI;
- blkSizeRegMask = RBM_RCX;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
}
}
#ifdef _TARGET_AMD64_
else
{
- // In case we have a constant integer this means we went beyond
- // CPBLK_MOVS_LIMIT bytes of size, still we should never have the case of
- // any GC-Pointers in the src struct.
- blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
- dstAddrRegMask = RBM_ARG_0;
- sourceRegMask = RBM_ARG_1;
- blkSizeRegMask = RBM_ARG_2;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindHelper;
}
#elif defined(_TARGET_X86_)
else
{
- dstAddrRegMask = RBM_RDI;
- sourceRegMask = RBM_RSI;
- blkSizeRegMask = RBM_RCX;
blkNode->gtBlkOpKind = GenTreeBlk::BlkOpKindRepInstr;
}
#endif // _TARGET_X86_
assert(blkNode->gtBlkOpKind != GenTreeBlk::BlkOpKindInvalid);
}
- if (dstAddrRegMask != RBM_NONE)
- {
- dstAddr->gtLsraInfo.setSrcCandidates(l, dstAddrRegMask);
- }
- if (sourceRegMask != RBM_NONE)
- {
- if (srcAddrOrFill != nullptr)
- {
- srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, sourceRegMask);
- }
- else
- {
- // This is a local source; we'll use a temp register for its address.
- blkNode->gtLsraInfo.addInternalCandidates(l, sourceRegMask);
- blkNode->gtLsraInfo.internalIntCount++;
- }
- }
- if (blkSizeRegMask != RBM_NONE)
- {
- if (size != 0)
- {
- // Reserve a temp register for the block size argument.
- blkNode->gtLsraInfo.addInternalCandidates(l, blkSizeRegMask);
- blkNode->gtLsraInfo.internalIntCount++;
- }
- else
- {
- // The block size argument is a third argument to GT_STORE_DYN_BLK
- noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
- blkNode->gtLsraInfo.setSrcCount(3);
- GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
- blockSize->gtLsraInfo.setSrcCandidates(l, blkSizeRegMask);
- }
- }
}
#ifdef FEATURE_PUT_STRUCT_ARG_STK
//------------------------------------------------------------------------
-// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK.
+// LowerPutArgStk: Lower a GT_PUTARG_STK.
//
// Arguments:
// tree - The node of interest
@@ -2023,11 +332,8 @@ void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
// Return Value:
// None.
//
-void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
+void Lowering::LowerPutArgStk(GenTreePutArgStk* putArgStk)
{
- TreeNodeInfo* info = &(putArgStk->gtLsraInfo);
- LinearScan* l = m_lsra;
-
#ifdef _TARGET_X86_
if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST)
{
@@ -2070,9 +376,6 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
fieldCount++;
}
- info->srcCount = fieldCount;
- info->dstCount = 0;
-
// In theory, the upper bound for the size of a field list is 8: these constructs only appear when passing the
// collection of lclVars that represent the fields of a promoted struct lclVar, and we do not promote struct
// lclVars with more than 4 fields. If each of these lclVars is of type long, decomposition will split the
@@ -2103,9 +406,8 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
putArgStk->gtOp1 = fieldList;
}
- // Now that the fields have been sorted, initialize the LSRA info.
+ // Now that the fields have been sorted, the kind of code we will generate.
bool allFieldsAreSlots = true;
- bool needsByteTemp = false;
unsigned prevOffset = putArgStk->getArgSize();
for (GenTreeFieldList* current = fieldList; current != nullptr; current = current->Rest())
{
@@ -2114,56 +416,12 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
const unsigned fieldOffset = current->gtFieldOffset;
assert(fieldType != TYP_LONG);
- // For x86 we must mark all integral fields as contained or reg-optional, and handle them
- // accordingly in code generation, since we may have up to 8 fields, which cannot all be in
- // registers to be consumed atomically by the call.
- if (varTypeIsIntegralOrI(fieldNode))
- {
- if (fieldNode->OperGet() == GT_LCL_VAR)
- {
- LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]);
- if (varDsc->lvTracked && !varDsc->lvDoNotEnregister)
- {
- SetRegOptional(fieldNode);
- }
- else
- {
- MakeSrcContained(putArgStk, fieldNode);
- }
- }
- else if (fieldNode->IsIntCnsFitsInI32())
- {
- MakeSrcContained(putArgStk, fieldNode);
- }
- else
- {
- // For the case where we cannot directly push the value, if we run out of registers,
- // it would be better to defer computation until we are pushing the arguments rather
- // than spilling, but this situation is not all that common, as most cases of promoted
- // structs do not have a large number of fields, and of those most are lclVars or
- // copy-propagated constants.
- SetRegOptional(fieldNode);
- }
- }
- else
- {
- assert(varTypeIsFloating(fieldNode));
- }
-
// We can treat as a slot any field that is stored at a slot boundary, where the previous
// field is not in the same slot. (Note that we store the fields in reverse order.)
const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4);
if (!fieldIsSlot)
{
allFieldsAreSlots = false;
- if (varTypeIsByte(fieldType))
- {
- // If this field is a slot--i.e. it is an integer field that is 4-byte aligned and takes up 4 bytes
- // (including padding)--we can store the whole value rather than just the byte. Otherwise, we will
- // need a byte-addressable register for the store. We will enforce this requirement on an internal
- // register, which we can use to copy multiple byte values.
- needsByteTemp = true;
- }
}
if (varTypeIsGC(fieldType))
@@ -2187,35 +445,13 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
else
{
putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::Push;
- // If any of the fields cannot be stored with an actual push, we may need a temporary
- // register to load the value before storing it to the stack location.
- info->internalIntCount = 1;
- regMaskTP regMask = l->allRegs(TYP_INT);
- if (needsByteTemp)
- {
- regMask &= ~RBM_NON_BYTE_REGS;
- }
- info->setInternalCandidates(l, regMask);
}
return;
}
#endif // _TARGET_X86_
-#if defined(FEATURE_SIMD) && defined(_TARGET_X86_)
- // For PutArgStk of a TYP_SIMD12, we need an extra register.
- if (putArgStk->TypeGet() == TYP_SIMD12)
- {
- info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount;
- info->dstCount = 0;
- info->internalFloatCount = 1;
- info->setInternalCandidates(l, l->allSIMDRegs());
- return;
- }
-#endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_)
-
if (putArgStk->TypeGet() != TYP_STRUCT)
{
- TreeNodeInfoInitSimple(putArgStk);
return;
}
@@ -2223,21 +459,6 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
GenTreePtr src = putArgStk->gtOp1;
GenTreePtr srcAddr = nullptr;
- bool haveLocalAddr = false;
- if ((src->OperGet() == GT_OBJ) || (src->OperGet() == GT_IND))
- {
- srcAddr = src->gtOp.gtOp1;
- assert(srcAddr != nullptr);
- haveLocalAddr = srcAddr->OperIsLocalAddr();
- }
- else
- {
- assert(varTypeIsSIMD(putArgStk));
- }
-
- info->srcCount = src->gtLsraInfo.dstCount;
- info->dstCount = 0;
-
// In case of a CpBlk we could use a helper call. In case of putarg_stk we
// can't do that since the helper call could kill some already set up outgoing args.
// TODO-Amd64-Unix: converge the code for putarg_stk with cpyblk/cpyobj.
@@ -2257,38 +478,6 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
// our framework assemblies, so this is the main code generation scheme we'll use.
if (size <= CPBLK_UNROLL_LIMIT && putArgStk->gtNumberReferenceSlots == 0)
{
- // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
- //
- // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
- // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude
- // RBM_NON_BYTE_REGS from internal candidates.
- if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
- {
- info->internalIntCount++;
- regMaskTP regMask = l->allRegs(TYP_INT);
-
-#ifdef _TARGET_X86_
- if ((size % 2) != 0)
- {
- regMask &= ~RBM_NON_BYTE_REGS;
- }
-#endif
- info->setInternalCandidates(l, regMask);
- }
-
-#ifdef _TARGET_X86_
- if (size >= 8)
-#else // !_TARGET_X86_
- if (size >= XMM_REGSIZE_BYTES)
-#endif // !_TARGET_X86_
- {
- // If we have a buffer larger than or equal to XMM_REGSIZE_BYTES on x64/ux,
- // or larger than or equal to 8 bytes on x86, reserve an XMM register to use it for a
- // series of 16-byte loads and stores.
- info->internalFloatCount = 1;
- info->addInternalCandidates(l, l->internalFloatRegCandidates());
- }
-
#ifdef _TARGET_X86_
if (size < XMM_REGSIZE_BYTES)
{
@@ -2310,1486 +499,11 @@ void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
#endif // _TARGET_X86_
else
{
- info->internalIntCount += 3;
- info->setInternalCandidates(l, (RBM_RDI | RBM_RCX | RBM_RSI));
-
putArgStk->gtPutArgStkKind = GenTreePutArgStk::Kind::RepInstr;
}
-
- // Always mark the OBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree.
- MakeSrcContained(putArgStk, src);
-
- if (haveLocalAddr)
- {
- // If the source address is the address of a lclVar, make the source address contained to avoid unnecessary
- // copies.
- //
- // To avoid an assertion in MakeSrcContained, increment the parent's source count beforehand and decrement it
- // afterwards.
- info->srcCount++;
- MakeSrcContained(putArgStk, srcAddr);
- info->srcCount--;
- }
}
#endif // FEATURE_PUT_STRUCT_ARG_STK
-//------------------------------------------------------------------------
-// TreeNodeInfoInitLclHeap: Set the NodeInfo for a GT_LCLHEAP.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
- Compiler* compiler = comp;
-
- info->srcCount = 1;
- info->dstCount = 1;
-
- // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
- // Here '-' means don't care.
- //
- // Size? Init Memory? # temp regs
- // 0 - 0 (returns 0)
- // const and <=6 reg words - 0 (pushes '0')
- // const and >6 reg words Yes 0 (pushes '0')
- // const and <PageSize No 0 (amd64) 1 (x86)
- // (x86:tmpReg for sutracting from esp)
- // const and >=PageSize No 2 (regCnt and tmpReg for subtracing from sp)
- // Non-const Yes 0 (regCnt=targetReg and pushes '0')
- // Non-const No 2 (regCnt and tmpReg for subtracting from sp)
- //
- // Note: Here we don't need internal register to be different from targetReg.
- // Rather, require it to be different from operand's reg.
-
- GenTreePtr size = tree->gtOp.gtOp1;
- if (size->IsCnsIntOrI())
- {
- MakeSrcContained(tree, size);
-
- size_t sizeVal = size->gtIntCon.gtIconVal;
-
- if (sizeVal == 0)
- {
- info->internalIntCount = 0;
- }
- else
- {
- // Compute the amount of memory to properly STACK_ALIGN.
- // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
- // This should also help in debugging as we can examine the original size specified with localloc.
- sizeVal = AlignUp(sizeVal, STACK_ALIGN);
-
- // For small allocations up to 6 pointer sized words (i.e. 48 bytes of localloc)
- // we will generate 'push 0'.
- assert((sizeVal % REGSIZE_BYTES) == 0);
- size_t cntRegSizedWords = sizeVal / REGSIZE_BYTES;
- if (cntRegSizedWords <= 6)
- {
- info->internalIntCount = 0;
- }
- else if (!compiler->info.compInitMem)
- {
- // No need to initialize allocated stack space.
- if (sizeVal < compiler->eeGetPageSize())
- {
-#ifdef _TARGET_X86_
- info->internalIntCount = 1; // x86 needs a register here to avoid generating "sub" on ESP.
-#else // !_TARGET_X86_
- info->internalIntCount = 0;
-#endif // !_TARGET_X86_
- }
- else
- {
- // We need two registers: regCnt and RegTmp
- info->internalIntCount = 2;
- }
- }
- else
- {
- // >6 and need to zero initialize allocated stack space.
- info->internalIntCount = 0;
- }
- }
- }
- else
- {
- if (!compiler->info.compInitMem)
- {
- info->internalIntCount = 2;
- }
- else
- {
- info->internalIntCount = 0;
- }
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitLogicalOp: Set the NodeInfo for GT_AND/GT_OR/GT_XOR,
-// as well as GT_ADD/GT_SUB.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitLogicalOp(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
-
- // We're not marking a constant hanging on the left of the add
- // as containable so we assign it to a register having CQ impact.
- // TODO-XArch-CQ: Detect this case and support both generating a single instruction
- // for GT_ADD(Constant, SomeTree)
- info->srcCount = 2;
- info->dstCount = 1;
-
- GenTree* op1 = tree->gtGetOp1();
- GenTree* op2 = tree->gtGetOp2();
-
- // We can directly encode the second operand if it is either a containable constant or a memory-op.
- // In case of memory-op, we can encode it directly provided its type matches with 'tree' type.
- // This is because during codegen, type of 'tree' is used to determine emit Type size. If the types
- // do not match, they get normalized (i.e. sign/zero extended) on load into a register.
- bool directlyEncodable = false;
- bool binOpInRMW = false;
- GenTreePtr operand = nullptr;
-
- if (IsContainableImmed(tree, op2))
- {
- directlyEncodable = true;
- operand = op2;
- }
- else
- {
- binOpInRMW = IsBinOpInRMWStoreInd(tree);
- if (!binOpInRMW)
- {
- if (op2->isMemoryOp() && tree->TypeGet() == op2->TypeGet())
- {
- directlyEncodable = true;
- operand = op2;
- }
- else if (tree->OperIsCommutative())
- {
- if (IsContainableImmed(tree, op1) ||
- (op1->isMemoryOp() && tree->TypeGet() == op1->TypeGet() && IsSafeToContainMem(tree, op1)))
- {
- // If it is safe, we can reverse the order of operands of commutative operations for efficient
- // codegen
- directlyEncodable = true;
- operand = op1;
- }
- }
- }
- }
-
- if (directlyEncodable)
- {
- assert(operand != nullptr);
- MakeSrcContained(tree, operand);
- }
- else if (!binOpInRMW)
- {
- // If this binary op neither has contained operands, nor is a
- // Read-Modify-Write (RMW) operation, we can mark its operands
- // as reg optional.
- SetRegOptionalForBinOp(tree);
- }
-
- // Codegen of this tree node sets ZF and SF flags.
- tree->gtFlags |= GTF_ZSF_SET;
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitModDiv: Set the NodeInfo for GT_MOD/GT_DIV/GT_UMOD/GT_UDIV.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
-
- GenTree* op1 = tree->gtGetOp1();
- GenTree* op2 = tree->gtGetOp2();
-
- info->srcCount = 2;
- info->dstCount = 1;
-
- switch (tree->OperGet())
- {
- case GT_MOD:
- case GT_DIV:
- if (varTypeIsFloating(tree->TypeGet()))
- {
- // No implicit conversions at this stage as the expectation is that
- // everything is made explicit by adding casts.
- assert(op1->TypeGet() == op2->TypeGet());
-
- if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(tree, op2);
- }
- else
- {
- // If there are no containable operands, we can make an operand reg optional.
- // SSE2 allows only op2 to be a memory-op.
- SetRegOptional(op2);
- }
-
- return;
- }
- break;
-
- default:
- break;
- }
-
- // Amd64 Div/Idiv instruction:
- // Dividend in RAX:RDX and computes
- // Quotient in RAX, Remainder in RDX
-
- if (tree->OperGet() == GT_MOD || tree->OperGet() == GT_UMOD)
- {
- // We are interested in just the remainder.
- // RAX is used as a trashable register during computation of remainder.
- info->setDstCandidates(l, RBM_RDX);
- }
- else
- {
- // We are interested in just the quotient.
- // RDX gets used as trashable register during computation of quotient
- info->setDstCandidates(l, RBM_RAX);
- }
-
- bool op2CanBeRegOptional = true;
-#ifdef _TARGET_X86_
- if (op1->OperGet() == GT_LONG)
- {
- // To avoid reg move would like to have op1's low part in RAX and high part in RDX.
- GenTree* loVal = op1->gtGetOp1();
- GenTree* hiVal = op1->gtGetOp2();
-
- // Src count is actually 3, so increment.
- assert(op2->IsCnsIntOrI());
- assert(tree->OperGet() == GT_UMOD);
- info->srcCount++;
- op2CanBeRegOptional = false;
-
- // This situation also requires an internal register.
- info->internalIntCount = 1;
- info->setInternalCandidates(l, l->allRegs(TYP_INT));
-
- loVal->gtLsraInfo.setSrcCandidates(l, RBM_EAX);
- hiVal->gtLsraInfo.setSrcCandidates(l, RBM_EDX);
- }
- else
-#endif
- {
- // If possible would like to have op1 in RAX to avoid a register move
- op1->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
- }
-
- // divisor can be an r/m, but the memory indirection must be of the same size as the divide
- if (op2->isMemoryOp() && (op2->TypeGet() == tree->TypeGet()))
- {
- MakeSrcContained(tree, op2);
- }
- else if (op2CanBeRegOptional)
- {
- op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
-
- // If there are no containable operands, we can make an operand reg optional.
- // Div instruction allows only op2 to be a memory op.
- SetRegOptional(op2);
- }
-}
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitIntrinsic: Set the NodeInfo for a GT_INTRINSIC.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* l = m_lsra;
-
- // Both operand and its result must be of floating point type.
- GenTree* op1 = tree->gtGetOp1();
- assert(varTypeIsFloating(op1));
- assert(op1->TypeGet() == tree->TypeGet());
-
- info->srcCount = 1;
- info->dstCount = 1;
-
- switch (tree->gtIntrinsic.gtIntrinsicId)
- {
- case CORINFO_INTRINSIC_Sqrt:
- if (op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(tree, op1);
- }
- else
- {
- // Mark the operand as reg optional since codegen can still
- // generate code if op1 is on stack.
- SetRegOptional(op1);
- }
- break;
-
- case CORINFO_INTRINSIC_Abs:
- // Abs(float x) = x & 0x7fffffff
- // Abs(double x) = x & 0x7ffffff ffffffff
-
- // In case of Abs we need an internal register to hold mask.
-
- // TODO-XArch-CQ: avoid using an internal register for the mask.
- // Andps or andpd both will operate on 128-bit operands.
- // The data section constant to hold the mask is a 64-bit size.
- // Therefore, we need both the operand and mask to be in
- // xmm register. When we add support in emitter to emit 128-bit
- // data constants and instructions that operate on 128-bit
- // memory operands we can avoid the need for an internal register.
- if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs)
- {
- info->internalFloatCount = 1;
- info->setInternalCandidates(l, l->internalFloatRegCandidates());
- }
- break;
-
-#ifdef _TARGET_X86_
- case CORINFO_INTRINSIC_Cos:
- case CORINFO_INTRINSIC_Sin:
- case CORINFO_INTRINSIC_Round:
- NYI_X86("Math intrinsics Cos, Sin and Round");
- break;
-#endif // _TARGET_X86_
-
- default:
- // Right now only Sqrt/Abs are treated as math intrinsics
- noway_assert(!"Unsupported math intrinsic");
- unreached();
- break;
- }
-}
-
-#ifdef FEATURE_SIMD
-//------------------------------------------------------------------------
-// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree.
-//
-// Arguments:
-// tree - The GT_SIMD node of interest
-//
-// Return Value:
-// None.
-
-void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
-{
- GenTreeSIMD* simdTree = tree->AsSIMD();
- TreeNodeInfo* info = &(tree->gtLsraInfo);
- LinearScan* lsra = m_lsra;
- info->dstCount = 1;
- switch (simdTree->gtSIMDIntrinsicID)
- {
- GenTree* op1;
- GenTree* op2;
-
- case SIMDIntrinsicInit:
- {
- info->srcCount = 1;
- op1 = tree->gtOp.gtOp1;
-
- // This sets all fields of a SIMD struct to the given value.
- // Mark op1 as contained if it is either zero or int constant of all 1's,
- // or a float constant with 16 or 32 byte simdType (AVX case)
- //
- // Should never see small int base type vectors except for zero initialization.
- assert(!varTypeIsSmallInt(simdTree->gtSIMDBaseType) || op1->IsIntegralConst(0));
-
- if (op1->IsFPZero() || op1->IsIntegralConst(0) ||
- (varTypeIsIntegral(simdTree->gtSIMDBaseType) && op1->IsIntegralConst(-1)))
- {
- MakeSrcContained(tree, tree->gtOp.gtOp1);
- info->srcCount = 0;
- }
- else if ((comp->getSIMDInstructionSet() == InstructionSet_AVX) &&
- ((simdTree->gtSIMDSize == 16) || (simdTree->gtSIMDSize == 32)))
- {
- // Either op1 is a float or dbl constant or an addr
- if (op1->IsCnsFltOrDbl() || op1->OperIsLocalAddr())
- {
- MakeSrcContained(tree, tree->gtOp.gtOp1);
- info->srcCount = 0;
- }
- }
- }
- break;
-
- case SIMDIntrinsicInitN:
- {
- info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType));
-
- // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
- info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- break;
-
- case SIMDIntrinsicInitArray:
- // We have an array and an index, which may be contained.
- info->srcCount = 2;
- CheckImmedAndMakeContained(tree, tree->gtGetOp2());
- break;
-
- case SIMDIntrinsicDiv:
- // SSE2 has no instruction support for division on integer vectors
- noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 2;
- break;
-
- case SIMDIntrinsicAbs:
- // This gets implemented as bitwise-And operation with a mask
- // and hence should never see it here.
- unreached();
- break;
-
- case SIMDIntrinsicSqrt:
- // SSE2 has no instruction support for sqrt on integer vectors.
- noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 1;
- break;
-
- case SIMDIntrinsicAdd:
- case SIMDIntrinsicSub:
- case SIMDIntrinsicMul:
- case SIMDIntrinsicBitwiseAnd:
- case SIMDIntrinsicBitwiseAndNot:
- case SIMDIntrinsicBitwiseOr:
- case SIMDIntrinsicBitwiseXor:
- case SIMDIntrinsicMin:
- case SIMDIntrinsicMax:
- info->srcCount = 2;
-
- // SSE2 32-bit integer multiplication requires two temp regs
- if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT &&
- comp->getSIMDInstructionSet() == InstructionSet_SSE2)
- {
- info->internalFloatCount = 2;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- break;
-
- case SIMDIntrinsicEqual:
- info->srcCount = 2;
- break;
-
- // SSE2 doesn't support < and <= directly on int vectors.
- // Instead we need to use > and >= with swapped operands.
- case SIMDIntrinsicLessThan:
- case SIMDIntrinsicLessThanOrEqual:
- info->srcCount = 2;
- noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType));
- break;
-
- // SIMDIntrinsicEqual is supported only on non-floating point base type vectors.
- // SSE2 cmpps/pd doesn't support > and >= directly on float/double vectors.
- // Instead we need to use < and <= with swapped operands.
- case SIMDIntrinsicGreaterThan:
- noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
- info->srcCount = 2;
- break;
-
- case SIMDIntrinsicOpEquality:
- case SIMDIntrinsicOpInEquality:
- info->srcCount = 2;
-
- // On SSE4/AVX, we can generate optimal code for (in)equality
- // against zero using ptest. We can safely do the this optimization
- // for integral vectors but not for floating-point for the reason
- // that we have +0.0 and -0.0 and +0.0 == -0.0
- op2 = tree->gtGetOp2();
- if ((comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4) && op2->IsIntegralConstVector(0))
- {
- MakeSrcContained(tree, op2);
- }
- else
- {
-
- // Need one SIMD register as scratch.
- // See genSIMDIntrinsicRelOp() for details on code sequence generated and
- // the need for one scratch register.
- //
- // Note these intrinsics produce a BOOL result, hence internal float
- // registers reserved are guaranteed to be different from target
- // integer register without explicitly specifying.
- info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- break;
-
- case SIMDIntrinsicDotProduct:
- // Float/Double vectors:
- // For SSE, or AVX with 32-byte vectors, we also need an internal register
- // as scratch. Further we need the targetReg and internal reg to be distinct
- // registers. Note that if this is a TYP_SIMD16 or smaller on AVX, then we
- // don't need a tmpReg.
- //
- // 32-byte integer vector on SSE4/AVX:
- // will take advantage of phaddd, which operates only on 128-bit xmm reg.
- // This will need 1 (in case of SSE4) or 2 (in case of AVX) internal
- // registers since targetReg is an int type register.
- //
- // See genSIMDIntrinsicDotProduct() for details on code sequence generated
- // and the need for scratch registers.
- if (varTypeIsFloating(simdTree->gtSIMDBaseType))
- {
- if ((comp->getSIMDInstructionSet() == InstructionSet_SSE2) ||
- (simdTree->gtOp.gtOp1->TypeGet() == TYP_SIMD32))
- {
- info->internalFloatCount = 1;
- info->isInternalRegDelayFree = true;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- // else don't need scratch reg(s).
- }
- else
- {
- assert(simdTree->gtSIMDBaseType == TYP_INT && comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4);
-
- // No need to set isInternalRegDelayFree since targetReg is a
- // an int type reg and guaranteed to be different from xmm/ymm
- // regs.
- info->internalFloatCount = comp->canUseAVX() ? 2 : 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- info->srcCount = 2;
- break;
-
- case SIMDIntrinsicGetItem:
- {
- // This implements get_Item method. The sources are:
- // - the source SIMD struct
- // - index (which element to get)
- // The result is baseType of SIMD struct.
- info->srcCount = 2;
- op1 = tree->gtOp.gtOp1;
- op2 = tree->gtOp.gtOp2;
-
- // If the index is a constant, mark it as contained.
- if (CheckImmedAndMakeContained(tree, op2))
- {
- info->srcCount = 1;
- }
-
- if (op1->isMemoryOp())
- {
- MakeSrcContained(tree, op1);
-
- // Although GT_IND of TYP_SIMD12 reserves an internal float
- // register for reading 4 and 8 bytes from memory and
- // assembling them into target XMM reg, it is not required
- // in this case.
- op1->gtLsraInfo.internalIntCount = 0;
- op1->gtLsraInfo.internalFloatCount = 0;
- }
- else
- {
- // If the index is not a constant, we will use the SIMD temp location to store the vector.
- // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we
- // can use that in the process of extracting the element.
- //
- // If the index is a constant and base type is a small int we can use pextrw, but on AVX
- // we will need a temp if are indexing into the upper half of the AVX register.
- // In all other cases with constant index, we need a temp xmm register to extract the
- // element if index is other than zero.
-
- if (!op2->IsCnsIntOrI())
- {
- (void)comp->getSIMDInitTempVarNum();
- }
- else if (!varTypeIsFloating(simdTree->gtSIMDBaseType))
- {
- bool needFloatTemp;
- if (varTypeIsSmallInt(simdTree->gtSIMDBaseType) &&
- (comp->getSIMDInstructionSet() == InstructionSet_AVX))
- {
- int byteShiftCnt = (int)op2->AsIntCon()->gtIconVal * genTypeSize(simdTree->gtSIMDBaseType);
- needFloatTemp = (byteShiftCnt >= 16);
- }
- else
- {
- needFloatTemp = !op2->IsIntegralConst(0);
- }
-
- if (needFloatTemp)
- {
- info->internalFloatCount = 1;
- info->setInternalCandidates(lsra, lsra->allSIMDRegs());
- }
- }
- }
- }
- break;
-
- case SIMDIntrinsicSetX:
- case SIMDIntrinsicSetY:
- case SIMDIntrinsicSetZ:
- case SIMDIntrinsicSetW:
- info->srcCount = 2;
-
- // We need an internal integer register for SSE2 codegen
- if (comp->getSIMDInstructionSet() == InstructionSet_SSE2)
- {
- info->internalIntCount = 1;
- info->setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
- }
-
- break;
-
- case SIMDIntrinsicCast:
- info->srcCount = 1;
- break;
-
- case SIMDIntrinsicShuffleSSE2:
- info->srcCount = 2;
- // Second operand is an integer constant and marked as contained.
- op2 = tree->gtOp.gtOp2;
- noway_assert(op2->IsCnsIntOrI());
- MakeSrcContained(tree, op2);
- break;
-
- case SIMDIntrinsicGetX:
- case SIMDIntrinsicGetY:
- case SIMDIntrinsicGetZ:
- case SIMDIntrinsicGetW:
- case SIMDIntrinsicGetOne:
- case SIMDIntrinsicGetZero:
- case SIMDIntrinsicGetCount:
- case SIMDIntrinsicGetAllOnes:
- assert(!"Get intrinsics should not be seen during Lowering.");
- unreached();
-
- default:
- noway_assert(!"Unimplemented SIMD node type.");
- unreached();
- }
-}
-#endif // FEATURE_SIMD
-
-//------------------------------------------------------------------------
-// TreeNodeInfoInitCast: Set the NodeInfo for a GT_CAST.
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// None.
-//
-void Lowering::TreeNodeInfoInitCast(GenTree* tree)
-{
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- // TODO-XArch-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned register.
- // see CodeGen::genIntToIntCast()
-
- info->srcCount = 1;
- info->dstCount = 1;
-
- // Non-overflow casts to/from float/double are done using SSE2 instructions
- // and that allow the source operand to be either a reg or memop. Given the
- // fact that casts from small int to float/double are done as two-level casts,
- // the source operand is always guaranteed to be of size 4 or 8 bytes.
- var_types castToType = tree->CastToType();
- GenTreePtr castOp = tree->gtCast.CastOp();
- var_types castOpType = castOp->TypeGet();
- if (tree->gtFlags & GTF_UNSIGNED)
- {
- castOpType = genUnsignedType(castOpType);
- }
-
- if (!tree->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(castOpType)))
- {
-#ifdef DEBUG
- // If converting to float/double, the operand must be 4 or 8 byte in size.
- if (varTypeIsFloating(castToType))
- {
- unsigned opSize = genTypeSize(castOpType);
- assert(opSize == 4 || opSize == 8);
- }
-#endif // DEBUG
-
- // U8 -> R8 conversion requires that the operand be in a register.
- if (castOpType != TYP_ULONG)
- {
- if (castOp->isMemoryOp() || castOp->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(tree, castOp);
- }
- else
- {
- // Mark castOp as reg optional to indicate codegen
- // can still generate code if it is on stack.
- SetRegOptional(castOp);
- }
- }
- }
-
-#if !defined(_TARGET_64BIT_)
- if (varTypeIsLong(castOpType))
- {
- noway_assert(castOp->OperGet() == GT_LONG);
- info->srcCount = 2;
- }
-#endif // !defined(_TARGET_64BIT_)
-
- // some overflow checks need a temp reg:
- // - GT_CAST from INT64/UINT64 to UINT32
- if (tree->gtOverflow() && (castToType == TYP_UINT))
- {
- if (genTypeSize(castOpType) == 8)
- {
- // Here we don't need internal register to be different from targetReg,
- // rather require it to be different from operand's reg.
- info->internalIntCount = 1;
- }
- }
-}
-
-void Lowering::LowerGCWriteBarrier(GenTree* tree)
-{
- assert(tree->OperGet() == GT_STOREIND);
-
- GenTreeStoreInd* dst = tree->AsStoreInd();
- GenTreePtr addr = dst->Addr();
- GenTreePtr src = dst->Data();
-
- if (addr->OperGet() == GT_LEA)
- {
- // In the case where we are doing a helper assignment, if the dst
- // is an indir through an lea, we need to actually instantiate the
- // lea in a register
- GenTreeAddrMode* lea = addr->AsAddrMode();
-
- int leaSrcCount = 0;
- if (lea->HasBase())
- {
- leaSrcCount++;
- }
- if (lea->HasIndex())
- {
- leaSrcCount++;
- }
- lea->gtLsraInfo.srcCount = leaSrcCount;
- lea->gtLsraInfo.dstCount = 1;
- }
-
- bool useOptimizedWriteBarrierHelper = false; // By default, assume no optimized write barriers.
-
-#if NOGC_WRITE_BARRIERS
-
-#if defined(_TARGET_X86_)
-
- useOptimizedWriteBarrierHelper = true; // On x86, use the optimized write barriers by default.
-#ifdef DEBUG
- GCInfo::WriteBarrierForm wbf = comp->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, src);
- if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
- {
- useOptimizedWriteBarrierHelper = false;
- }
-#endif
-
- if (useOptimizedWriteBarrierHelper)
- {
- // Special write barrier:
- // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and
- // op2 (src) goes into any int register.
- addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER);
- src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_SRC);
- }
-
-#else // !defined(_TARGET_X86_)
-#error "NOGC_WRITE_BARRIERS is not supported"
-#endif // !defined(_TARGET_X86_)
-
-#endif // NOGC_WRITE_BARRIERS
-
- if (!useOptimizedWriteBarrierHelper)
- {
- // For the standard JIT Helper calls:
- // op1 (addr) goes into REG_ARG_0 and
- // op2 (src) goes into REG_ARG_1
- addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
- src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
- }
-
- // Both src and dst must reside in a register, which they should since we haven't set
- // either of them as contained.
- assert(addr->gtLsraInfo.dstCount == 1);
- assert(src->gtLsraInfo.dstCount == 1);
-}
-
-//-----------------------------------------------------------------------------------------
-// Specify register requirements for address expression of an indirection operation.
-//
-// Arguments:
-// indirTree - GT_IND or GT_STOREIND gentree node
-//
-void Lowering::SetIndirAddrOpCounts(GenTreePtr indirTree)
-{
- assert(indirTree->isIndir());
- // If this is the rhs of a block copy (i.e. non-enregisterable struct),
- // it has no register requirements.
- if (indirTree->TypeGet() == TYP_STRUCT)
- {
- return;
- }
-
- GenTreePtr addr = indirTree->gtGetOp1();
- TreeNodeInfo* info = &(indirTree->gtLsraInfo);
-
- GenTreePtr base = nullptr;
- GenTreePtr index = nullptr;
- unsigned mul, cns;
- bool rev;
-
-#ifdef FEATURE_SIMD
- // If indirTree is of TYP_SIMD12, don't mark addr as contained
- // so that it always get computed to a register. This would
- // mean codegen side logic doesn't need to handle all possible
- // addr expressions that could be contained.
- //
- // TODO-XArch-CQ: handle other addr mode expressions that could be marked
- // as contained.
- if (indirTree->TypeGet() == TYP_SIMD12)
- {
- // Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
- // To assemble the vector properly we would need an additional
- // XMM register.
- info->internalFloatCount = 1;
-
- // In case of GT_IND we need an internal register different from targetReg and
- // both of the registers are used at the same time.
- if (indirTree->OperGet() == GT_IND)
- {
- info->isInternalRegDelayFree = true;
- }
-
- info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
-
- return;
- }
-#endif // FEATURE_SIMD
-
- if ((indirTree->gtFlags & GTF_IND_REQ_ADDR_IN_REG) != 0)
- {
- // The address of an indirection that requires its address in a reg.
- // Skip any further processing that might otherwise make it contained.
- }
- else if ((addr->OperGet() == GT_CLS_VAR_ADDR) || (addr->OperGet() == GT_LCL_VAR_ADDR))
- {
- // These nodes go into an addr mode:
- // - GT_CLS_VAR_ADDR turns into a constant.
- // - GT_LCL_VAR_ADDR is a stack addr mode.
-
- // make this contained, it turns into a constant that goes into an addr mode
- MakeSrcContained(indirTree, addr);
- }
- else if (addr->IsCnsIntOrI() && addr->AsIntConCommon()->FitsInAddrBase(comp))
- {
- // Amd64:
- // We can mark any pc-relative 32-bit addr as containable, except for a direct VSD call address.
- // (i.e. those VSD calls for which stub addr is known during JIT compilation time). In this case,
- // VM requires us to pass stub addr in REG_VIRTUAL_STUB_PARAM - see LowerVirtualStubCall(). For
- // that reason we cannot mark such an addr as contained. Note that this is not an issue for
- // indirect VSD calls since morphArgs() is explicitly materializing hidden param as a non-standard
- // argument.
- //
- // Workaround:
- // Note that LowerVirtualStubCall() sets addr->gtRegNum to REG_VIRTUAL_STUB_PARAM and Lowering::doPhase()
- // sets destination candidates on such nodes and resets addr->gtRegNum to REG_NA before calling
- // TreeNodeInfoInit(). Ideally we should set a flag on addr nodes that shouldn't be marked as contained
- // (in LowerVirtualStubCall()), but we don't have any GTF_* flags left for that purpose. As a workaround
- // an explicit check is made here.
- //
- // On x86, direct VSD is done via a relative branch, and in fact it MUST be contained.
- MakeSrcContained(indirTree, addr);
- }
- else if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
- {
- MakeSrcContained(indirTree, addr);
- }
- else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
- !AreSourcesPossiblyModifiedLocals(indirTree, base, index))
- {
- // An addressing mode will be constructed that may cause some
- // nodes to not need a register, and cause others' lifetimes to be extended
- // to the GT_IND or even its parent if it's an assignment
-
- assert(base != addr);
- m_lsra->clearOperandCounts(addr);
-
- const bool hasBase = base != nullptr;
- const bool hasIndex = index != nullptr;
- assert(hasBase || hasIndex); // At least one of a base or an index must be present.
-
- // If the addressing mode has both a base and an index, bump its source count by one. If it only has one or the
- // other, its source count is already correct (due to the source for the address itself).
- if (hasBase && hasIndex)
- {
- info->srcCount++;
- }
-
- // Traverse the computation below GT_IND to find the operands
- // for the addressing mode, marking the various constants and
- // intermediate results as not consuming/producing.
- // If the traversal were more complex, we might consider using
- // a traversal function, but the addressing mode is only made
- // up of simple arithmetic operators, and the code generator
- // only traverses one leg of each node.
-
- bool foundBase = !hasBase;
- bool foundIndex = !hasIndex;
- for (GenTree *child = addr, *nextChild = nullptr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
- {
- nextChild = nullptr;
- GenTree* op1 = child->gtOp.gtOp1;
- GenTree* op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
-
- if (op1 == base)
- {
- foundBase = true;
- }
- else if (op1 == index)
- {
- foundIndex = true;
- }
- else
- {
- m_lsra->clearOperandCounts(op1);
- if (!op1->OperIsLeaf())
- {
- nextChild = op1;
- }
- }
-
- if (op2 != nullptr)
- {
- if (op2 == base)
- {
- foundBase = true;
- }
- else if (op2 == index)
- {
- foundIndex = true;
- }
- else
- {
- m_lsra->clearOperandCounts(op2);
- if (!op2->OperIsLeaf())
- {
- assert(nextChild == nullptr);
- nextChild = op2;
- }
- }
- }
- }
- assert(foundBase && foundIndex);
- }
- else if (addr->gtOper == GT_ARR_ELEM)
- {
- // The GT_ARR_ELEM consumes all the indices and produces the offset.
- // The array object lives until the mem access.
- // We also consume the target register to which the address is
- // computed
-
- info->srcCount++;
- assert(addr->gtLsraInfo.srcCount >= 2);
- addr->gtLsraInfo.srcCount -= 1;
- }
-}
-
-void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
-{
- assert(tree->OperIsCompare());
-
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- info->srcCount = 2;
- info->dstCount = 1;
-
-#ifdef _TARGET_X86_
- // If the compare is used by a jump, we just need to set the condition codes. If not, then we need
- // to store the result into the low byte of a register, which requires the dst be a byteable register.
- // We always set the dst candidates, though, because if this is compare is consumed by a jump, they
- // won't be used. We might be able to use GTF_RELOP_JMP_USED to determine this case, but it's not clear
- // that flag is maintained until this location (especially for decomposed long compares).
- info->setDstCandidates(m_lsra, RBM_BYTE_REGS);
-#endif // _TARGET_X86_
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
- var_types op1Type = op1->TypeGet();
- var_types op2Type = op2->TypeGet();
-
-#if !defined(_TARGET_64BIT_)
- // Long compares will consume GT_LONG nodes, each of which produces two results.
- // Thus for each long operand there will be an additional source.
- // TODO-X86-CQ: Mark hiOp2 and loOp2 as contained if it is a constant or a memory op.
- if (varTypeIsLong(op1Type))
- {
- info->srcCount++;
- }
- if (varTypeIsLong(op2Type))
- {
- info->srcCount++;
- }
-#endif // !defined(_TARGET_64BIT_)
-
- // If either of op1 or op2 is floating point values, then we need to use
- // ucomiss or ucomisd to compare, both of which support the following form:
- // ucomis[s|d] xmm, xmm/mem
- // That is only the second operand can be a memory op.
- //
- // Second operand is a memory Op: Note that depending on comparison operator,
- // the operands of ucomis[s|d] need to be reversed. Therefore, either op1 or
- // op2 can be a memory op depending on the comparison operator.
- if (varTypeIsFloating(op1Type))
- {
- // The type of the operands has to be the same and no implicit conversions at this stage.
- assert(op1Type == op2Type);
-
- bool reverseOps;
- if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)
- {
- // Unordered comparison case
- reverseOps = (tree->gtOper == GT_GT || tree->gtOper == GT_GE);
- }
- else
- {
- reverseOps = (tree->gtOper == GT_LT || tree->gtOper == GT_LE);
- }
-
- GenTreePtr otherOp;
- if (reverseOps)
- {
- otherOp = op1;
- }
- else
- {
- otherOp = op2;
- }
-
- assert(otherOp != nullptr);
- if (otherOp->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(tree, otherOp);
- }
- else if (otherOp->isMemoryOp() && ((otherOp == op2) || IsSafeToContainMem(tree, otherOp)))
- {
- MakeSrcContained(tree, otherOp);
- }
- else
- {
- // SSE2 allows only otherOp to be a memory-op. Since otherOp is not
- // contained, we can mark it reg-optional.
- SetRegOptional(otherOp);
- }
-
- return;
- }
-
- // TODO-XArch-CQ: factor out cmp optimization in 'genCondSetFlags' to be used here
- // or in other backend.
-
- bool hasShortCast = false;
- if (CheckImmedAndMakeContained(tree, op2))
- {
- // If the types are the same, or if the constant is of the correct size,
- // we can treat the isMemoryOp as contained.
- bool op1CanBeContained = (genTypeSize(op1Type) == genTypeSize(op2Type));
-
- // Do we have a short compare against a constant in op2
- //
- if (varTypeIsSmall(op1Type))
- {
- GenTreeIntCon* con = op2->AsIntCon();
- ssize_t ival = con->gtIconVal;
-
- bool isEqualityCompare = (tree->gtOper == GT_EQ || tree->gtOper == GT_NE);
- bool useTest = isEqualityCompare && (ival == 0);
-
- if (!useTest)
- {
- ssize_t lo = 0; // minimum imm value allowed for cmp reg,imm
- ssize_t hi = 0; // maximum imm value allowed for cmp reg,imm
- bool isUnsigned = false;
-
- switch (op1Type)
- {
- case TYP_BOOL:
- op1Type = TYP_UBYTE;
- __fallthrough;
- case TYP_UBYTE:
- lo = 0;
- hi = 0x7f;
- isUnsigned = true;
- break;
- case TYP_BYTE:
- lo = -0x80;
- hi = 0x7f;
- break;
- case TYP_CHAR:
- lo = 0;
- hi = 0x7fff;
- isUnsigned = true;
- break;
- case TYP_SHORT:
- lo = -0x8000;
- hi = 0x7fff;
- break;
- default:
- unreached();
- }
-
- if ((ival >= lo) && (ival <= hi))
- {
- // We can perform a small compare with the immediate 'ival'
- tree->gtFlags |= GTF_RELOP_SMALL;
- if (isUnsigned && !isEqualityCompare)
- {
- tree->gtFlags |= GTF_UNSIGNED;
- }
- // We can treat the isMemoryOp as "contained"
- op1CanBeContained = true;
- }
- }
- }
-
- if (op1CanBeContained)
- {
- if (op1->isMemoryOp())
- {
- MakeSrcContained(tree, op1);
- }
- else
- {
- bool op1IsMadeContained = false;
-
- // When op1 is a GT_AND we can often generate a single "test" instruction
- // instead of two instructions (an "and" instruction followed by a "cmp"/"test").
- //
- // This instruction can only be used for equality or inequality comparisons.
- // and we must have a compare against zero.
- //
- // If we have a postive test for a single bit we can reverse the condition and
- // make the compare be against zero.
- //
- // Example:
- // GT_EQ GT_NE
- // / \ / \
- // GT_AND GT_CNS (0x100) ==>> GT_AND GT_CNS (0)
- // / \ / \
- // andOp1 GT_CNS (0x100) andOp1 GT_CNS (0x100)
- //
- // We will mark the GT_AND node as contained if the tree is an equality compare with zero.
- // Additionally, when we do this we also allow for a contained memory operand for "andOp1".
- //
- bool isEqualityCompare = (tree->gtOper == GT_EQ || tree->gtOper == GT_NE);
-
- if (isEqualityCompare && (op1->OperGet() == GT_AND))
- {
- GenTreePtr andOp2 = op1->gtOp.gtOp2;
- if (IsContainableImmed(op1, andOp2))
- {
- ssize_t andOp2CnsVal = andOp2->AsIntConCommon()->IconValue();
- ssize_t relOp2CnsVal = op2->AsIntConCommon()->IconValue();
-
- if ((relOp2CnsVal == andOp2CnsVal) && isPow2(andOp2CnsVal))
- {
- // We have a single bit test, so now we can change the
- // tree into the alternative form,
- // so that we can generate a test instruction.
-
- // Reverse the equality comparison
- tree->SetOperRaw((tree->gtOper == GT_EQ) ? GT_NE : GT_EQ);
-
- // Change the relOp2CnsVal to zero
- relOp2CnsVal = 0;
- op2->AsIntConCommon()->SetIconValue(0);
- }
-
- // Now do we have a equality compare with zero?
- //
- if (relOp2CnsVal == 0)
- {
- // Note that child nodes must be made contained before parent nodes
-
- // Check for a memory operand for op1 with the test instruction
- //
- GenTreePtr andOp1 = op1->gtOp.gtOp1;
- if (andOp1->isMemoryOp())
- {
- // If the type of value memoryOp (andOp1) is not the same as the type of constant
- // (andOp2) check to see whether it is safe to mark AndOp1 as contained. For e.g. in
- // the following case it is not safe to mark andOp1 as contained
- // AndOp1 = signed byte and andOp2 is an int constant of value 512.
- //
- // If it is safe, we update the type and value of andOp2 to match with andOp1.
- bool containable = (andOp1->TypeGet() == op1->TypeGet());
- if (!containable)
- {
- ssize_t newIconVal = 0;
-
- switch (andOp1->TypeGet())
- {
- default:
- break;
- case TYP_BYTE:
- newIconVal = (signed char)andOp2CnsVal;
- containable = FitsIn<signed char>(andOp2CnsVal);
- break;
- case TYP_BOOL:
- case TYP_UBYTE:
- newIconVal = andOp2CnsVal & 0xFF;
- containable = true;
- break;
- case TYP_SHORT:
- newIconVal = (signed short)andOp2CnsVal;
- containable = FitsIn<signed short>(andOp2CnsVal);
- break;
- case TYP_CHAR:
- newIconVal = andOp2CnsVal & 0xFFFF;
- containable = true;
- break;
- case TYP_INT:
- newIconVal = (INT32)andOp2CnsVal;
- containable = FitsIn<INT32>(andOp2CnsVal);
- break;
- case TYP_UINT:
- newIconVal = andOp2CnsVal & 0xFFFFFFFF;
- containable = true;
- break;
-
-#ifdef _TARGET_64BIT_
- case TYP_LONG:
- newIconVal = (INT64)andOp2CnsVal;
- containable = true;
- break;
- case TYP_ULONG:
- newIconVal = (UINT64)andOp2CnsVal;
- containable = true;
- break;
-#endif //_TARGET_64BIT_
- }
-
- if (containable)
- {
- andOp2->gtType = andOp1->TypeGet();
- andOp2->AsIntConCommon()->SetIconValue(newIconVal);
- }
- }
-
- // Mark the 'andOp1' memory operand as contained
- // Note that for equality comparisons we don't need
- // to deal with any signed or unsigned issues.
- if (containable)
- {
- MakeSrcContained(op1, andOp1);
- }
- }
- // Mark the 'op1' (the GT_AND) operand as contained
- MakeSrcContained(tree, op1);
- op1IsMadeContained = true;
-
- // During Codegen we will now generate "test andOp1, andOp2CnsVal"
- }
- }
- }
- else if (op1->OperGet() == GT_CAST)
- {
- // If the op1 is a cast operation, and cast type is one byte sized unsigned type,
- // we can directly use the number in register, instead of doing an extra cast step.
- var_types dstType = op1->CastToType();
- bool isUnsignedDst = varTypeIsUnsigned(dstType);
- emitAttr castSize = EA_ATTR(genTypeSize(dstType));
- GenTreePtr castOp1 = op1->gtOp.gtOp1;
- genTreeOps castOp1Oper = castOp1->OperGet();
- bool safeOper = false;
-
- // It is not always safe to change the gtType of 'castOp1' to TYP_UBYTE.
- // For example when 'castOp1Oper' is a GT_RSZ or GT_RSH then we are shifting
- // bits from the left into the lower bits. If we change the type to a TYP_UBYTE
- // we will instead generate a byte sized shift operation: shr al, 24
- // For the following ALU operations is it safe to change the gtType to the
- // smaller type:
- //
- if ((castOp1Oper == GT_CNS_INT) || (castOp1Oper == GT_CALL) || // the return value from a Call
- (castOp1Oper == GT_LCL_VAR) || castOp1->OperIsLogical() || // GT_AND, GT_OR, GT_XOR
- castOp1->isMemoryOp()) // isIndir() || isLclField();
- {
- safeOper = true;
- }
-
- if ((castSize == EA_1BYTE) && isUnsignedDst && // Unsigned cast to TYP_UBYTE
- safeOper && // Must be a safe operation
- !op1->gtOverflow()) // Must not be an overflow checking cast
- {
- // Currently all of the Oper accepted as 'safeOper' are
- // non-overflow checking operations. If we were to add
- // an overflow checking operation then this assert needs
- // to be moved above to guard entry to this block.
- //
- assert(!castOp1->gtOverflowEx()); // Must not be an overflow checking operation
-
- // TODO-Cleanup: we're within "if (CheckImmedAndMakeContained(tree, op2))", so isn't
- // the following condition always true?
- if (op2->isContainedIntOrIImmed())
- {
- ssize_t val = (ssize_t)op2->AsIntConCommon()->IconValue();
- if (val >= 0 && val <= 255)
- {
- GenTreePtr removeTreeNode = op1;
- tree->gtOp.gtOp1 = castOp1;
- op1 = castOp1;
- castOp1->gtType = TYP_UBYTE;
-
- // trim down the value if castOp1 is an int constant since its type changed to UBYTE.
- if (castOp1Oper == GT_CNS_INT)
- {
- castOp1->gtIntCon.gtIconVal = (UINT8)castOp1->gtIntCon.gtIconVal;
- }
-
- op2->gtType = TYP_UBYTE;
- tree->gtFlags |= GTF_UNSIGNED;
-
- // right now the op1's type is the same as op2's type.
- // if op1 is MemoryOp, we should make the op1 as contained node.
- if (castOp1->isMemoryOp())
- {
- MakeSrcContained(tree, op1);
- op1IsMadeContained = true;
- }
-
- BlockRange().Remove(removeTreeNode);
-
- // We've changed the type on op1 to TYP_UBYTE, but we already processed that node.
- // We need to go back and mark it byteable.
- // TODO-Cleanup: it might be better to move this out of the TreeNodeInfoInit pass to
- // the earlier "lower" pass, in which case the byteable check would just fall out.
- // But that is quite complex!
- TreeNodeInfoInitCheckByteable(op1);
-
-#ifdef DEBUG
- if (comp->verbose)
- {
- printf("TreeNodeInfoInitCmp: Removing a GT_CAST to TYP_UBYTE and changing "
- "castOp1->gtType to TYP_UBYTE\n");
- comp->gtDispTreeRange(BlockRange(), tree);
- }
-#endif
- }
- }
- }
- }
-
- // If not made contained, op1 can be marked as reg-optional.
- if (!op1IsMadeContained)
- {
- SetRegOptional(op1);
-
- // If op1 codegen sets ZF and SF flags and ==/!= against
- // zero, we don't need to generate test instruction,
- // provided we don't have another GenTree node between op1
- // and tree that could potentially modify flags.
- //
- // TODO-CQ: right now the below peep is inexpensive and
- // gets the benefit in most of cases because in majority
- // of cases op1, op2 and tree would be in that order in
- // execution. In general we should be able to check that all
- // the nodes that come after op1 in execution order do not
- // modify the flags so that it is safe to avoid generating a
- // test instruction. Such a check requires that on each
- // GenTree node we need to set the info whether its codegen
- // will modify flags.
- //
- // TODO-CQ: We can optimize compare against zero in the
- // following cases by generating the branch as indicated
- // against each case.
- // 1) unsigned compare
- // < 0 - always FALSE
- // <= 0 - ZF=1 and jne
- // > 0 - ZF=0 and je
- // >= 0 - always TRUE
- //
- // 2) signed compare
- // < 0 - SF=1 and js
- // >= 0 - SF=0 and jns
- if (isEqualityCompare && op1->gtSetZSFlags() && op2->IsIntegralConst(0) && (op1->gtNext == op2) &&
- (op2->gtNext == tree))
- {
- // Require codegen of op1 to set the flags.
- assert(!op1->gtSetFlags());
- op1->gtFlags |= GTF_SET_FLAGS;
- }
- }
- }
- }
- }
- else if (op1Type == op2Type)
- {
- if (op2->isMemoryOp())
- {
- MakeSrcContained(tree, op2);
- }
- else if (op1->isMemoryOp() && IsSafeToContainMem(tree, op1))
- {
- MakeSrcContained(tree, op1);
- }
- else if (op1->IsCnsIntOrI())
- {
- // TODO-CQ: We should be able to support swapping op1 and op2 to generate cmp reg, imm,
- // but there is currently an assert in CodeGen::genCompareInt().
- // https://github.com/dotnet/coreclr/issues/7270
- SetRegOptional(op2);
- }
- else
- {
- // One of op1 or op2 could be marked as reg optional
- // to indicate that codegen can still generate code
- // if one of them is on stack.
- SetRegOptional(PreferredRegOptionalOperand(tree));
- }
-
- if (varTypeIsSmall(op1Type) && varTypeIsUnsigned(op1Type))
- {
- // Mark the tree as doing unsigned comparison if
- // both the operands are small and unsigned types.
- // Otherwise we will end up performing a signed comparison
- // of two small unsigned values without zero extending them to
- // TYP_INT size and which is incorrect.
- tree->gtFlags |= GTF_UNSIGNED;
- }
- }
-}
-
/* Lower GT_CAST(srcType, DstType) nodes.
*
* Casts from small int type to float/double are transformed as follows:
@@ -4236,312 +950,6 @@ bool Lowering::IsRMWMemOpRootedAtStoreInd(GenTreePtr tree, GenTreePtr* outIndirC
return true;
}
-//--------------------------------------------------------------------------------------------
-// SetStoreIndOpCountsIfRMWMemOp checks to see if there is a RMW memory operation rooted at
-// GT_STOREIND node and if so will mark register requirements for nodes under storeInd so
-// that CodeGen will generate a single instruction of the form:
-//
-// binOp [addressing mode], reg
-//
-// Parameters
-// storeInd - GT_STOREIND node
-//
-// Return value
-// True, if RMW memory op tree pattern is recognized and op counts are set.
-// False otherwise.
-//
-bool Lowering::SetStoreIndOpCountsIfRMWMemOp(GenTreePtr storeInd)
-{
- assert(storeInd->OperGet() == GT_STOREIND);
-
- // SSE2 doesn't support RMW on float values
- assert(!varTypeIsFloating(storeInd));
-
- // Terminology:
- // indirDst = memory write of an addr mode (i.e. storeind destination)
- // indirSrc = value being written to memory (i.e. storeind source which could a binary/unary op)
- // indirCandidate = memory read i.e. a gtInd of an addr mode
- // indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node)
-
- GenTreePtr indirCandidate = nullptr;
- GenTreePtr indirOpSource = nullptr;
-
- if (!IsRMWMemOpRootedAtStoreInd(storeInd, &indirCandidate, &indirOpSource))
- {
- JITDUMP("Lower of StoreInd didn't mark the node as self contained for reason: %d\n",
- storeInd->AsStoreInd()->GetRMWStatus());
- DISPTREERANGE(BlockRange(), storeInd);
- return false;
- }
-
- GenTreePtr indirDst = storeInd->gtGetOp1();
- GenTreePtr indirSrc = storeInd->gtGetOp2();
- genTreeOps oper = indirSrc->OperGet();
-
- // At this point we have successfully detected a RMW memory op of one of the following forms
- // storeInd(indirDst, indirSrc(indirCandidate, indirOpSource)) OR
- // storeInd(indirDst, indirSrc(indirOpSource, indirCandidate) in case of commutative operations OR
- // storeInd(indirDst, indirSrc(indirCandidate) in case of unary operations
- //
- // Here indirSrc = one of the supported binary or unary operation for RMW of memory
- // indirCandidate = a GT_IND node
- // indirCandidateChild = operand of GT_IND indirCandidate
- //
- // The logic below essentially does the following
- // set storeInd src count to that of the dst count of indirOpSource
- // clear operand counts on indirSrc (i.e. marked as contained and storeInd will generate code for it)
- // clear operand counts on indirCandidate
- // clear operand counts on indirDst except when it is a GT_LCL_VAR or GT_CNS_INT that doesn't fit within addr
- // base
- // Increment src count of storeInd to account for the registers required to form indirDst addr mode
- // clear operand counts on indirCandidateChild
-
- TreeNodeInfo* info = &(storeInd->gtLsraInfo);
- info->dstCount = 0;
-
- if (GenTree::OperIsBinary(oper))
- {
- // On Xarch RMW operations require that the source memory-op be in a register.
- assert(!indirOpSource->isMemoryOp() || indirOpSource->gtLsraInfo.dstCount == 1);
- JITDUMP("Lower succesfully detected an assignment of the form: *addrMode BinOp= source\n");
- info->srcCount = indirOpSource->gtLsraInfo.dstCount;
- }
- else
- {
- assert(GenTree::OperIsUnary(oper));
- JITDUMP("Lower succesfully detected an assignment of the form: *addrMode = UnaryOp(*addrMode)\n");
- info->srcCount = 0;
- }
- DISPTREERANGE(BlockRange(), storeInd);
-
- m_lsra->clearOperandCounts(indirSrc);
- m_lsra->clearOperandCounts(indirCandidate);
-
- GenTreePtr indirCandidateChild = indirCandidate->gtGetOp1();
- if (indirCandidateChild->OperGet() == GT_LEA)
- {
- GenTreeAddrMode* addrMode = indirCandidateChild->AsAddrMode();
-
- if (addrMode->HasBase())
- {
- assert(addrMode->Base()->OperIsLeaf());
- m_lsra->clearOperandCounts(addrMode->Base());
- info->srcCount++;
- }
-
- if (addrMode->HasIndex())
- {
- assert(addrMode->Index()->OperIsLeaf());
- m_lsra->clearOperandCounts(addrMode->Index());
- info->srcCount++;
- }
-
- m_lsra->clearOperandCounts(indirDst);
- }
- else
- {
- assert(indirCandidateChild->OperGet() == GT_LCL_VAR || indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR ||
- indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR || indirCandidateChild->OperGet() == GT_CNS_INT);
-
- // If it is a GT_LCL_VAR, it still needs the reg to hold the address.
- // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base.
- // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit
- // time. Also, we don't need a reg for GT_CLS_VAR_ADDR.
- if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR)
- {
- m_lsra->clearOperandCounts(indirDst);
- }
- else if (indirCandidateChild->IsCnsIntOrI() && indirCandidateChild->AsIntConCommon()->FitsInAddrBase(comp))
- {
- m_lsra->clearOperandCounts(indirDst);
- }
- else
- {
- // Need a reg and hence increment src count of storeind
- info->srcCount += indirCandidateChild->gtLsraInfo.dstCount;
- }
- }
- m_lsra->clearOperandCounts(indirCandidateChild);
-
-#ifdef _TARGET_X86_
- if (varTypeIsByte(storeInd))
- {
- // If storeInd is of TYP_BYTE, set indirOpSources to byteable registers.
- bool containedNode = indirOpSource->gtLsraInfo.dstCount == 0;
- if (!containedNode)
- {
- regMaskTP regMask = indirOpSource->gtLsraInfo.getSrcCandidates(m_lsra);
- assert(regMask != RBM_NONE);
- indirOpSource->gtLsraInfo.setSrcCandidates(m_lsra, regMask & ~RBM_NON_BYTE_REGS);
- }
- }
-#endif
-
- return true;
-}
-
-/**
- * Takes care of annotating the src and dst register
- * requirements for a GT_MUL treenode.
- */
-void Lowering::SetMulOpCounts(GenTreePtr tree)
-{
-#if defined(_TARGET_X86_)
- assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI || tree->OperGet() == GT_MUL_LONG);
-#else
- assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI);
-#endif
- TreeNodeInfo* info = &(tree->gtLsraInfo);
-
- info->srcCount = 2;
- info->dstCount = 1;
-
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtOp.gtOp2;
-
- // Case of float/double mul.
- if (varTypeIsFloating(tree->TypeGet()))
- {
- assert(tree->OperGet() == GT_MUL);
-
- if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
- {
- MakeSrcContained(tree, op2);
- }
- else if (op1->IsCnsNonZeroFltOrDbl() || (op1->isMemoryOp() && IsSafeToContainMem(tree, op1)))
- {
- // Since GT_MUL is commutative, we will try to re-order operands if it is safe to
- // generate more efficient code sequence for the case of GT_MUL(op1=memOp, op2=non-memOp)
- MakeSrcContained(tree, op1);
- }
- else
- {
- // If there are no containable operands, we can make an operand reg optional.
- SetRegOptionalForBinOp(tree);
- }
- return;
- }
-
- bool isUnsignedMultiply = ((tree->gtFlags & GTF_UNSIGNED) != 0);
- bool requiresOverflowCheck = tree->gtOverflowEx();
- bool useLeaEncoding = false;
- GenTreePtr memOp = nullptr;
-
- bool hasImpliedFirstOperand = false;
- GenTreeIntConCommon* imm = nullptr;
- GenTreePtr other = nullptr;
-
-// There are three forms of x86 multiply:
-// one-op form: RDX:RAX = RAX * r/m
-// two-op form: reg *= r/m
-// three-op form: reg = r/m * imm
-
-// This special widening 32x32->64 MUL is not used on x64
-#if defined(_TARGET_X86_)
- if (tree->OperGet() != GT_MUL_LONG)
-#endif
- {
- assert((tree->gtFlags & GTF_MUL_64RSLT) == 0);
- }
-
- // Multiply should never be using small types
- assert(!varTypeIsSmall(tree->TypeGet()));
-
- // We do use the widening multiply to implement
- // the overflow checking for unsigned multiply
- //
- if (isUnsignedMultiply && requiresOverflowCheck)
- {
- // The only encoding provided is RDX:RAX = RAX * rm
- //
- // Here we set RAX as the only destination candidate
- // In LSRA we set the kill set for this operation to RBM_RAX|RBM_RDX
- //
- info->setDstCandidates(m_lsra, RBM_RAX);
- hasImpliedFirstOperand = true;
- }
- else if (tree->OperGet() == GT_MULHI)
- {
- // Have to use the encoding:RDX:RAX = RAX * rm. Since we only care about the
- // upper 32 bits of the result set the destination candidate to REG_RDX.
- info->setDstCandidates(m_lsra, RBM_RDX);
- hasImpliedFirstOperand = true;
- }
-#if defined(_TARGET_X86_)
- else if (tree->OperGet() == GT_MUL_LONG)
- {
- // have to use the encoding:RDX:RAX = RAX * rm
- info->setDstCandidates(m_lsra, RBM_RAX);
- hasImpliedFirstOperand = true;
- }
-#endif
- else if (IsContainableImmed(tree, op2) || IsContainableImmed(tree, op1))
- {
- if (IsContainableImmed(tree, op2))
- {
- imm = op2->AsIntConCommon();
- other = op1;
- }
- else
- {
- imm = op1->AsIntConCommon();
- other = op2;
- }
-
- // CQ: We want to rewrite this into a LEA
- ssize_t immVal = imm->AsIntConCommon()->IconValue();
- if (!requiresOverflowCheck && (immVal == 3 || immVal == 5 || immVal == 9))
- {
- useLeaEncoding = true;
- }
-
- MakeSrcContained(tree, imm); // The imm is always contained
- if (other->isMemoryOp())
- {
- memOp = other; // memOp may be contained below
- }
- }
-
- // We allow one operand to be a contained memory operand.
- // The memory op type must match with the 'tree' type.
- // This is because during codegen we use 'tree' type to derive EmitTypeSize.
- // E.g op1 type = byte, op2 type = byte but GT_MUL tree type is int.
- //
- if (memOp == nullptr && op2->isMemoryOp())
- {
- memOp = op2;
- }
-
- // To generate an LEA we need to force memOp into a register
- // so don't allow memOp to be 'contained'
- //
- if (!useLeaEncoding)
- {
- if ((memOp != nullptr) && (memOp->TypeGet() == tree->TypeGet()) && IsSafeToContainMem(tree, memOp))
- {
- MakeSrcContained(tree, memOp);
- }
- else if (imm != nullptr)
- {
- // Has a contained immediate operand.
- // Only 'other' operand can be marked as reg optional.
- assert(other != nullptr);
- SetRegOptional(other);
- }
- else if (hasImpliedFirstOperand)
- {
- // Only op2 can be marke as reg optional.
- SetRegOptional(op2);
- }
- else
- {
- // If there are no containable operands, we can make either of op1 or op2
- // as reg optional.
- SetRegOptionalForBinOp(tree);
- }
- }
-}
-
//------------------------------------------------------------------------------
// isRMWRegOper: Can this binary tree node be used in a Read-Modify-Write format
//
@@ -4732,71 +1140,6 @@ GenTree* Lowering::PreferredRegOptionalOperand(GenTree* tree)
return preferredOp;
}
-#ifdef _TARGET_X86_
-//------------------------------------------------------------------------
-// ExcludeNonByteableRegisters: Determines if we need to exclude non-byteable registers for
-// various reasons
-//
-// Arguments:
-// tree - The node of interest
-//
-// Return Value:
-// If we need to exclude non-byteable registers
-//
-bool Lowering::ExcludeNonByteableRegisters(GenTree* tree)
-{
- // Example1: GT_STOREIND(byte, addr, op2) - storeind of byte sized value from op2 into mem 'addr'
- // Storeind itself will not produce any value and hence dstCount=0. But op2 could be TYP_INT
- // value. In this case we need to exclude esi/edi from the src candidates of op2.
- if (varTypeIsByte(tree))
- {
- return true;
- }
- // Example2: GT_CAST(int <- bool <- int) - here type of GT_CAST node is int and castToType is bool.
- else if ((tree->OperGet() == GT_CAST) && varTypeIsByte(tree->CastToType()))
- {
- return true;
- }
- else if (tree->OperIsCompare())
- {
- GenTree* op1 = tree->gtGetOp1();
- GenTree* op2 = tree->gtGetOp2();
-
- // Example3: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses
- // ubyte as the result of comparison and if the result needs to be materialized into a reg
- // simply zero extend it to TYP_INT size. Here is an example of generated code:
- // cmp dl, byte ptr[addr mode]
- // movzx edx, dl
- if (varTypeIsByte(op1) && varTypeIsByte(op2))
- {
- return true;
- }
- // Example4: GT_EQ(int, op1 of type ubyte, op2 is GT_CNS_INT) - in this case codegen uses
- // ubyte as the result of the comparison and if the result needs to be materialized into a reg
- // simply zero extend it to TYP_INT size.
- else if (varTypeIsByte(op1) && op2->IsCnsIntOrI())
- {
- return true;
- }
- // Example4: GT_EQ(int, op1 is GT_CNS_INT, op2 of type ubyte) - in this case codegen uses
- // ubyte as the result of the comparison and if the result needs to be materialized into a reg
- // simply zero extend it to TYP_INT size.
- else if (op1->IsCnsIntOrI() && varTypeIsByte(op2))
- {
- return true;
- }
- else
- {
- return false;
- }
- }
- else
- {
- return false;
- }
-}
-#endif // _TARGET_X86_
-
#endif // _TARGET_XARCH_
#endif // !LEGACY_BACKEND
diff --git a/src/jit/lsra.cpp b/src/jit/lsra.cpp
index accfd6ee78..ac76e29364 100644
--- a/src/jit/lsra.cpp
+++ b/src/jit/lsra.cpp
@@ -1914,6 +1914,14 @@ bool LinearScan::isRegCandidate(LclVarDsc* varDsc)
void LinearScan::identifyCandidates()
{
+
+ // Initialize the sets of lclVars that are used to determine whether, and for which lclVars,
+ // we need to perform resolution across basic blocks.
+ // Note that we can't do this in the constructor because the number of tracked lclVars may
+ // change between the constructor and the actual allocation.
+ VarSetOps::AssignNoCopy(compiler, resolutionCandidateVars, VarSetOps::MakeEmpty(compiler));
+ VarSetOps::AssignNoCopy(compiler, splitOrSpilledVars, VarSetOps::MakeEmpty(compiler));
+
if (compiler->lvaCount == 0)
{
return;
@@ -1950,8 +1958,6 @@ void LinearScan::identifyCandidates()
// for vectors on Arm64, though the actual value may differ.
VarSetOps::AssignNoCopy(compiler, fpCalleeSaveCandidateVars, VarSetOps::MakeEmpty(compiler));
- VarSetOps::AssignNoCopy(compiler, resolutionCandidateVars, VarSetOps::MakeEmpty(compiler));
- VarSetOps::AssignNoCopy(compiler, splitOrSpilledVars, VarSetOps::MakeEmpty(compiler));
VARSET_TP VARSET_INIT_NOCOPY(fpMaybeCandidateVars, VarSetOps::MakeEmpty(compiler));
unsigned int floatVarCount = 0;
unsigned int thresholdFPRefCntWtd = 4 * BB_UNITY_WEIGHT;
@@ -3411,7 +3417,7 @@ static int ComputeOperandDstCount(GenTree* operand)
// ComputeAvailableSrcCount: computes the number of registers available as
// sources for a node.
//
-// This is simply the sum of the number of registers prduced by each
+// This is simply the sum of the number of registers produced by each
// operand to the node.
//
// Arguments:
@@ -3430,7 +3436,7 @@ static int ComputeAvailableSrcCount(GenTree* node)
return numSources;
}
-#endif
+#endif // DEBUG
void LinearScan::buildRefPositionsForNode(GenTree* tree,
BasicBlock* block,
@@ -10412,11 +10418,21 @@ void LinearScan::lsraDispNode(GenTreePtr tree, LsraTupleDumpMode mode, bool hasD
}
if (!hasDest && tree->gtHasReg())
{
- // This can be true for the "localDefUse" case - defining a reg, but
- // pushing it on the stack
- assert(spillChar == ' ');
- spillChar = '*';
- hasDest = true;
+ // A node can define a register, but not produce a value for a parent to consume,
+ // i.e. in the "localDefUse" case.
+ // There used to be an assert here that we wouldn't spill such a node.
+ // However, we can have unused lclVars that wind up being the node at which
+ // it is spilled. This probably indicates a bug, but we don't realy want to
+ // assert during a dump.
+ if (spillChar == 'S')
+ {
+ spillChar = '$';
+ }
+ else
+ {
+ spillChar = '*';
+ }
+ hasDest = true;
}
}
printf("%c N%03u. ", spillChar, tree->gtSeqNum);
diff --git a/src/jit/lsraarm.cpp b/src/jit/lsraarm.cpp
new file mode 100644
index 0000000000..57f0096b35
--- /dev/null
+++ b/src/jit/lsraarm.cpp
@@ -0,0 +1,1073 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Register Requirements for ARM XX
+XX XX
+XX This encapsulates all the logic for setting register requirements for XX
+XX the ARM architecture. XX
+XX XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_ARM_
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+#include "lsra.h"
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitStoreLoc: Lower a store of a lclVar
+//
+// Arguments:
+// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
+//
+// Notes:
+// This involves:
+// - Setting the appropriate candidates for a store of a multi-reg call return value.
+// - Handling of contained immediates and widening operations of unsigneds.
+//
+void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
+{
+ TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
+
+ // Is this the case of var = call where call is returning
+ // a value in multiple return registers?
+ GenTree* op1 = storeLoc->gtGetOp1();
+ if (op1->IsMultiRegCall())
+ {
+ // backend expects to see this case only for store lclvar.
+ assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
+
+ // srcCount = number of registers in which the value is returned by call
+ GenTreeCall* call = op1->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ info->srcCount = retTypeDesc->GetReturnRegCount();
+
+ // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
+ regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
+ op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
+ return;
+ }
+
+ CheckImmedAndMakeContained(storeLoc, op1);
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCmp: Lower a GT comparison node.
+//
+// Arguments:
+// tree - the node to lower
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+ CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitGCWriteBarrier: GC lowering helper.
+//
+// Arguments:
+// tree - the node to lower
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
+{
+ GenTreePtr dst = tree;
+ GenTreePtr addr = tree->gtOp.gtOp1;
+ GenTreePtr src = tree->gtOp.gtOp2;
+
+ if (addr->OperGet() == GT_LEA)
+ {
+ // In the case where we are doing a helper assignment, if the dst
+ // is an indir through an lea, we need to actually instantiate the
+ // lea in a register
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+
+ short leaSrcCount = 0;
+ if (lea->Base() != nullptr)
+ {
+ leaSrcCount++;
+ }
+ if (lea->Index() != nullptr)
+ {
+ leaSrcCount++;
+ }
+ lea->gtLsraInfo.srcCount = leaSrcCount;
+ lea->gtLsraInfo.dstCount = 1;
+ }
+
+#if NOGC_WRITE_BARRIERS
+ NYI_ARM("NOGC_WRITE_BARRIERS");
+#else
+ // For the standard JIT Helper calls
+ // op1 goes into REG_ARG_0 and
+ // op2 goes into REG_ARG_1
+ //
+ addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
+ src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
+#endif // NOGC_WRITE_BARRIERS
+
+ // Both src and dst must reside in a register, which they should since we haven't set
+ // either of them as contained.
+ assert(addr->gtLsraInfo.dstCount == 1);
+ assert(src->gtLsraInfo.dstCount == 1);
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitIndir: Specify register requirements for address expression
+// of an indirection operation.
+//
+// Arguments:
+// indirTree - GT_IND, GT_STOREIND, block node or GT_NULLCHECK gentree node
+//
+void Lowering::TreeNodeInfoInitIndir(GenTreePtr indirTree)
+{
+ assert(indirTree->OperIsIndir());
+ // If this is the rhs of a block copy (i.e. non-enregisterable struct),
+ // it has no register requirements.
+ if (indirTree->TypeGet() == TYP_STRUCT)
+ {
+ return;
+ }
+
+ GenTreePtr addr = indirTree->gtGetOp1();
+ TreeNodeInfo* info = &(indirTree->gtLsraInfo);
+
+ GenTreePtr base = nullptr;
+ GenTreePtr index = nullptr;
+ unsigned cns = 0;
+ unsigned mul;
+ bool rev;
+ bool modifiedSources = false;
+
+ if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
+ {
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+ base = lea->Base();
+ index = lea->Index();
+ cns = lea->gtOffset;
+
+ m_lsra->clearOperandCounts(addr);
+ // The srcCount is decremented because addr is now "contained",
+ // then we account for the base and index below, if they are non-null.
+ info->srcCount--;
+ }
+ else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
+ !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index)))
+ {
+ // An addressing mode will be constructed that may cause some
+ // nodes to not need a register, and cause others' lifetimes to be extended
+ // to the GT_IND or even its parent if it's an assignment
+
+ assert(base != addr);
+ m_lsra->clearOperandCounts(addr);
+
+ GenTreePtr arrLength = nullptr;
+
+ // Traverse the computation below GT_IND to find the operands
+ // for the addressing mode, marking the various constants and
+ // intermediate results as not consuming/producing.
+ // If the traversal were more complex, we might consider using
+ // a traversal function, but the addressing mode is only made
+ // up of simple arithmetic operators, and the code generator
+ // only traverses one leg of each node.
+
+ bool foundBase = (base == nullptr);
+ bool foundIndex = (index == nullptr);
+ GenTreePtr nextChild = nullptr;
+ for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
+ {
+ nextChild = nullptr;
+ GenTreePtr op1 = child->gtOp.gtOp1;
+ GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
+
+ if (op1 == base)
+ {
+ foundBase = true;
+ }
+ else if (op1 == index)
+ {
+ foundIndex = true;
+ }
+ else
+ {
+ m_lsra->clearOperandCounts(op1);
+ if (!op1->OperIsLeaf())
+ {
+ nextChild = op1;
+ }
+ }
+
+ if (op2 != nullptr)
+ {
+ if (op2 == base)
+ {
+ foundBase = true;
+ }
+ else if (op2 == index)
+ {
+ foundIndex = true;
+ }
+ else
+ {
+ m_lsra->clearOperandCounts(op2);
+ if (!op2->OperIsLeaf())
+ {
+ assert(nextChild == nullptr);
+ nextChild = op2;
+ }
+ }
+ }
+ }
+ assert(foundBase && foundIndex);
+ info->srcCount--; // it gets incremented below.
+ }
+ else if (addr->gtOper == GT_ARR_ELEM)
+ {
+ // The GT_ARR_ELEM consumes all the indices and produces the offset.
+ // The array object lives until the mem access.
+ // We also consume the target register to which the address is
+ // computed
+
+ info->srcCount++;
+ assert(addr->gtLsraInfo.srcCount >= 2);
+ addr->gtLsraInfo.srcCount -= 1;
+ }
+ else
+ {
+ // it is nothing but a plain indir
+ info->srcCount--; // base gets added in below
+ base = addr;
+ }
+
+ if (base != nullptr)
+ {
+ info->srcCount++;
+ }
+
+ if (index != nullptr && !modifiedSources)
+ {
+ info->srcCount++;
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ GenTree* op1 = tree->gtGetOp1();
+ regMaskTP useCandidates = RBM_NONE;
+
+ info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ info->dstCount = 0;
+
+ if (varTypeIsStruct(tree))
+ {
+ NYI_ARM("struct return");
+ }
+ else
+ {
+ // Non-struct type return - determine useCandidates
+ switch (tree->TypeGet())
+ {
+ case TYP_VOID:
+ useCandidates = RBM_NONE;
+ break;
+ case TYP_FLOAT:
+ useCandidates = RBM_FLOATRET;
+ break;
+ case TYP_DOUBLE:
+ useCandidates = RBM_DOUBLERET;
+ break;
+ case TYP_LONG:
+ useCandidates = RBM_LNGRET;
+ break;
+ default:
+ useCandidates = RBM_INTRET;
+ break;
+ }
+ }
+
+ if (useCandidates != RBM_NONE)
+ {
+ tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, useCandidates);
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCall: Set the NodeInfo for a call.
+//
+// Arguments:
+// call - The call node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
+{
+ TreeNodeInfo* info = &(call->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+ bool hasMultiRegRetVal = false;
+ ReturnTypeDesc* retTypeDesc = nullptr;
+
+ info->srcCount = 0;
+ if (call->TypeGet() != TYP_VOID)
+ {
+ hasMultiRegRetVal = call->HasMultiRegRetVal();
+ if (hasMultiRegRetVal)
+ {
+ // dst count = number of registers in which the value is returned by call
+ retTypeDesc = call->GetReturnTypeDesc();
+ info->dstCount = retTypeDesc->GetReturnRegCount();
+ }
+ else
+ {
+ info->dstCount = 1;
+ }
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
+
+ GenTree* ctrlExpr = call->gtControlExpr;
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ // either gtControlExpr != null or gtCallAddr != null.
+ // Both cannot be non-null at the same time.
+ assert(ctrlExpr == nullptr);
+ assert(call->gtCallAddr != nullptr);
+ ctrlExpr = call->gtCallAddr;
+ }
+
+ // set reg requirements on call target represented as control sequence.
+ if (ctrlExpr != nullptr)
+ {
+ // we should never see a gtControlExpr whose type is void.
+ assert(ctrlExpr->TypeGet() != TYP_VOID);
+
+ info->srcCount++;
+ // In case of fast tail implemented as jmp, make sure that gtControlExpr is
+ // computed into a register.
+ if (call->IsFastTailCall())
+ {
+ NYI_ARM("tail call");
+ }
+ }
+ else
+ {
+ info->internalIntCount = 1;
+ }
+
+ RegisterType registerType = call->TypeGet();
+
+ // Set destination candidates for return value of the call.
+ if (hasMultiRegRetVal)
+ {
+ assert(retTypeDesc != nullptr);
+ info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
+ }
+ else if (varTypeIsFloating(registerType))
+ {
+ info->setDstCandidates(l, RBM_FLOATRET);
+ }
+ else if (registerType == TYP_LONG)
+ {
+ info->setDstCandidates(l, RBM_LNGRET);
+ }
+ else
+ {
+ info->setDstCandidates(l, RBM_INTRET);
+ }
+
+ // If there is an explicit this pointer, we don't want that node to produce anything
+ // as it is redundant
+ if (call->gtCallObjp != nullptr)
+ {
+ GenTreePtr thisPtrNode = call->gtCallObjp;
+
+ if (thisPtrNode->gtOper == GT_PUTARG_REG)
+ {
+ l->clearOperandCounts(thisPtrNode);
+ l->clearDstCount(thisPtrNode->gtOp.gtOp1);
+ }
+ else
+ {
+ l->clearDstCount(thisPtrNode);
+ }
+ }
+
+ // First, count reg args
+ bool callHasFloatRegArgs = false;
+
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->OperIsList());
+
+ GenTreePtr argNode = list->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
+ assert(curArgTabEntry);
+
+ if (curArgTabEntry->regNum == REG_STK)
+ {
+ // late arg that is not passed in a register
+ assert(argNode->gtOper == GT_PUTARG_STK);
+
+ TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry);
+ continue;
+ }
+
+ var_types argType = argNode->TypeGet();
+ bool argIsFloat = varTypeIsFloating(argType);
+ callHasFloatRegArgs |= argIsFloat;
+
+ regNumber argReg = curArgTabEntry->regNum;
+ // We will setup argMask to the set of all registers that compose this argument
+ regMaskTP argMask = 0;
+
+ argNode = argNode->gtEffectiveVal();
+
+ // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct
+ if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_FIELD_LIST))
+ {
+ GenTreePtr actualArgNode = argNode;
+ unsigned originalSize = 0;
+
+ if (argNode->gtOper == GT_FIELD_LIST)
+ {
+ // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
+ GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
+
+ // Initailize the first register and the first regmask in our list
+ regNumber targetReg = argReg;
+ regMaskTP targetMask = genRegMask(targetReg);
+ unsigned iterationNum = 0;
+ originalSize = 0;
+
+ for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest())
+ {
+ GenTreePtr putArgRegNode = fieldListPtr->Current();
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+ GenTreePtr putArgChild = putArgRegNode->gtOp.gtOp1;
+
+ originalSize += REGSIZE_BYTES; // 8 bytes
+
+ // Record the register requirements for the GT_PUTARG_REG node
+ putArgRegNode->gtLsraInfo.setDstCandidates(l, targetMask);
+ putArgRegNode->gtLsraInfo.setSrcCandidates(l, targetMask);
+
+ // To avoid redundant moves, request that the argument child tree be
+ // computed in the register in which the argument is passed to the call.
+ putArgChild->gtLsraInfo.setSrcCandidates(l, targetMask);
+
+ // We consume one source for each item in this list
+ info->srcCount++;
+ iterationNum++;
+
+ // Update targetReg and targetMask for the next putarg_reg (if any)
+ targetReg = genRegArgNext(targetReg);
+ targetMask = genRegMask(targetReg);
+ }
+ }
+ else
+ {
+#ifdef DEBUG
+ compiler->gtDispTreeRange(BlockRange(), argNode);
+#endif
+ noway_assert(!"Unsupported TYP_STRUCT arg kind");
+ }
+
+ unsigned slots = ((unsigned)(roundUp(originalSize, REGSIZE_BYTES))) / REGSIZE_BYTES;
+ regNumber curReg = argReg;
+ regNumber lastReg = argIsFloat ? REG_ARG_FP_LAST : REG_ARG_LAST;
+ unsigned remainingSlots = slots;
+
+ while (remainingSlots > 0)
+ {
+ argMask |= genRegMask(curReg);
+ remainingSlots--;
+
+ if (curReg == lastReg)
+ break;
+
+ curReg = genRegArgNext(curReg);
+ }
+
+ // Struct typed arguments must be fully passed in registers (Reg/Stk split not allowed)
+ noway_assert(remainingSlots == 0);
+ argNode->gtLsraInfo.internalIntCount = 0;
+ }
+ else // A scalar argument (not a struct)
+ {
+ // We consume one source
+ info->srcCount++;
+
+ argMask |= genRegMask(argReg);
+ argNode->gtLsraInfo.setDstCandidates(l, argMask);
+ argNode->gtLsraInfo.setSrcCandidates(l, argMask);
+
+ if (argNode->gtOper == GT_PUTARG_REG)
+ {
+ GenTreePtr putArgChild = argNode->gtOp.gtOp1;
+
+ // To avoid redundant moves, request that the argument child tree be
+ // computed in the register in which the argument is passed to the call.
+ putArgChild->gtLsraInfo.setSrcCandidates(l, argMask);
+ }
+ }
+ }
+
+ // Now, count stack args
+ // Note that these need to be computed into a register, but then
+ // they're just stored to the stack - so the reg doesn't
+ // need to remain live until the call. In fact, it must not
+ // because the code generator doesn't actually consider it live,
+ // so it can't be spilled.
+
+ GenTreePtr args = call->gtCallArgs;
+ while (args)
+ {
+ GenTreePtr arg = args->gtOp.gtOp1;
+
+ // Skip arguments that have been moved to the Late Arg list
+ if (!(args->gtFlags & GTF_LATE_ARG))
+ {
+ if (arg->gtOper == GT_PUTARG_STK)
+ {
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
+ assert(curArgTabEntry);
+
+ assert(curArgTabEntry->regNum == REG_STK);
+
+ TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry);
+ }
+ else
+ {
+ TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
+ if (argInfo->dstCount != 0)
+ {
+ argInfo->isLocalDefUse = true;
+ }
+
+ argInfo->dstCount = 0;
+ }
+ }
+ args = args->gtOp.gtOp2;
+ }
+
+ if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
+ {
+ NYI_ARM("float reg varargs");
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
+//
+// Arguments:
+// argNode - a GT_PUTARG_STK node
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Set the child node(s) to be contained when we have a multireg arg
+//
+void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info)
+{
+ assert(argNode->gtOper == GT_PUTARG_STK);
+
+ GenTreePtr putArgChild = argNode->gtOp.gtOp1;
+
+ // Initialize 'argNode' as not contained, as this is both the default case
+ // and how MakeSrcContained expects to find things setup.
+ //
+ argNode->gtLsraInfo.srcCount = 1;
+ argNode->gtLsraInfo.dstCount = 0;
+
+ // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
+ if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST))
+ {
+ // We will use store instructions that each write a register sized value
+
+ if (putArgChild->OperGet() == GT_FIELD_LIST)
+ {
+ // We consume all of the items in the GT_FIELD_LIST
+ argNode->gtLsraInfo.srcCount = info->numSlots;
+ }
+ else
+ {
+ // We could use a ldp/stp sequence so we need two internal registers
+ argNode->gtLsraInfo.internalIntCount = 2;
+
+ if (putArgChild->OperGet() == GT_OBJ)
+ {
+ GenTreePtr objChild = putArgChild->gtOp.gtOp1;
+ if (objChild->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
+ // as one contained operation
+ //
+ MakeSrcContained(putArgChild, objChild);
+ }
+ }
+
+ // We will generate all of the code for the GT_PUTARG_STK and it's child node
+ // as one contained operation
+ //
+ MakeSrcContained(argNode, putArgChild);
+ }
+ }
+ else
+ {
+ // We must not have a multi-reg struct
+ assert(info->numSlots == 1);
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInit: Set the register requirements for RA.
+//
+// Notes:
+// Takes care of annotating the register requirements
+// for every TreeNodeInfo struct that maps to each tree node.
+//
+// Preconditions:
+// LSRA has been initialized and there is a TreeNodeInfo node
+// already allocated and initialized for every tree in the IR.
+//
+// Postconditions:
+// Every TreeNodeInfo instance has the right annotations on register
+// requirements needed by LSRA to build the Interval Table (source,
+// destination and internal [temp] register counts).
+//
+void Lowering::TreeNodeInfoInit(GenTree* tree)
+{
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ unsigned kind = tree->OperKind();
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ RegisterType registerType = TypeGet(tree);
+
+ JITDUMP("TreeNodeInfoInit for: ");
+ DISPNODE(tree);
+
+ switch (tree->OperGet())
+ {
+ GenTree* op1;
+ GenTree* op2;
+
+ case GT_STORE_LCL_FLD:
+ case GT_STORE_LCL_VAR:
+ info->srcCount = 1;
+ info->dstCount = 0;
+ LowerStoreLoc(tree->AsLclVarCommon());
+ TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon());
+ break;
+
+ case GT_NOP:
+ // A GT_NOP is either a passthrough (if it is void, or if it has
+ // a child), but must be considered to produce a dummy value if it
+ // has a type but no child
+ info->srcCount = 0;
+ if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
+ {
+ info->dstCount = 1;
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
+ break;
+
+ case GT_INTRINSIC:
+ {
+ // TODO-ARM: Implement other type of intrinsics (round, sqrt and etc.)
+ // Both operand and its result must be of the same floating point type.
+ op1 = tree->gtOp.gtOp1;
+ assert(varTypeIsFloating(op1));
+ assert(op1->TypeGet() == tree->TypeGet());
+
+ switch (tree->gtIntrinsic.gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Abs:
+ case CORINFO_INTRINSIC_Sqrt:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ break;
+ default:
+ NYI_ARM("Lowering::TreeNodeInfoInit for GT_INTRINSIC");
+ break;
+ }
+ }
+ break;
+
+ case GT_CAST:
+ {
+ info->srcCount = 1;
+ info->dstCount = 1;
+
+ // Non-overflow casts to/from float/double are done using SSE2 instructions
+ // and that allow the source operand to be either a reg or memop. Given the
+ // fact that casts from small int to float/double are done as two-level casts,
+ // the source operand is always guaranteed to be of size 4 or 8 bytes.
+ var_types castToType = tree->CastToType();
+ GenTreePtr castOp = tree->gtCast.CastOp();
+ var_types castOpType = castOp->TypeGet();
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ castOpType = genUnsignedType(castOpType);
+ }
+#ifdef DEBUG
+ if (!tree->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(castOpType)))
+ {
+ // If converting to float/double, the operand must be 4 or 8 byte in size.
+ if (varTypeIsFloating(castToType))
+ {
+ unsigned opSize = genTypeSize(castOpType);
+ assert(opSize == 4 || opSize == 8);
+ }
+ }
+#endif // DEBUG
+
+ if (tree->gtOverflow())
+ {
+ NYI_ARM("overflow checks");
+ }
+ }
+ break;
+
+ case GT_JTRUE:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ l->clearDstCount(tree->gtOp.gtOp1);
+ break;
+
+ case GT_JMP:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_SWITCH:
+ // This should never occur since switch nodes must not be visible at this
+ // point in the JIT.
+ info->srcCount = 0;
+ info->dstCount = 0; // To avoid getting uninit errors.
+ noway_assert(!"Switch must be lowered at this point");
+ break;
+
+ case GT_JMPTABLE:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ break;
+
+ case GT_SWITCH_TABLE:
+ info->srcCount = 2;
+ info->internalIntCount = 1;
+ info->dstCount = 0;
+ break;
+
+ case GT_ASG:
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ noway_assert(!"We should never hit any assignment operator in lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_ADD:
+ case GT_SUB:
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ // overflow operations aren't supported on float/double types.
+ assert(!tree->gtOverflow());
+
+ // No implicit conversions at this stage as the expectation is that
+ // everything is made explicit by adding casts.
+ assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ break;
+ }
+
+ __fallthrough;
+
+ case GT_AND:
+ case GT_OR:
+ case GT_XOR:
+ info->srcCount = 2;
+ info->dstCount = 1;
+ // Check and make op2 contained (if it is a containable immediate)
+ CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+ break;
+
+ case GT_MUL:
+ if (tree->gtOverflow())
+ {
+ // Need a register different from target reg to check for overflow.
+ info->internalIntCount = 2;
+ }
+ __fallthrough;
+
+ case GT_DIV:
+ case GT_MULHI:
+ case GT_UDIV:
+ {
+ info->srcCount = 2;
+ info->dstCount = 1;
+ }
+ break;
+
+ case GT_LIST:
+ case GT_FIELD_LIST:
+ case GT_ARGPLACE:
+ case GT_NO_OP:
+ case GT_START_NONGC:
+ case GT_PROF_HOOK:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_CNS_DBL:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ if (tree->TypeGet() == TYP_FLOAT)
+ {
+ // An int register for float constant
+ info->internalIntCount = 1;
+ }
+ else
+ {
+ // TYP_DOUBLE
+ assert(tree->TypeGet() == TYP_DOUBLE);
+
+ // Two int registers for double constant
+ info->internalIntCount = 2;
+ }
+ break;
+
+ case GT_RETURN:
+ TreeNodeInfoInitReturn(tree);
+ break;
+
+ case GT_RETFILT:
+ if (tree->TypeGet() == TYP_VOID)
+ {
+ info->srcCount = 0;
+ info->dstCount = 0;
+ }
+ else
+ {
+ assert(tree->TypeGet() == TYP_INT);
+
+ info->srcCount = 1;
+ info->dstCount = 0;
+
+ info->setSrcCandidates(l, RBM_INTRET);
+ tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
+ }
+ break;
+
+ case GT_LEA:
+ {
+ GenTreeAddrMode* lea = tree->AsAddrMode();
+
+ GenTree* base = lea->Base();
+ GenTree* index = lea->Index();
+ unsigned cns = lea->gtOffset;
+
+ // This LEA is instantiating an address,
+ // so we set up the srcCount and dstCount here.
+ info->srcCount = 0;
+ if (base != nullptr)
+ {
+ info->srcCount++;
+ }
+ if (index != nullptr)
+ {
+ info->srcCount++;
+ }
+ info->dstCount = 1;
+
+ if ((index != nullptr) && (cns != 0))
+ {
+ NYI_ARM("GT_LEA: index and cns are not nil");
+ }
+ else if (!emitter::emitIns_valid_imm_for_add(cns, INS_FLAGS_DONT_CARE))
+ {
+ NYI_ARM("GT_LEA: invalid imm");
+ }
+ }
+ break;
+
+ case GT_NEG:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ break;
+
+ case GT_NOT:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROR:
+ {
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ GenTreePtr shiftBy = tree->gtOp.gtOp2;
+ GenTreePtr source = tree->gtOp.gtOp1;
+ if (shiftBy->IsCnsIntOrI())
+ {
+ l->clearDstCount(shiftBy);
+ info->srcCount--;
+ }
+ }
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ TreeNodeInfoInitCmp(tree);
+ break;
+
+ case GT_CALL:
+ TreeNodeInfoInitCall(tree->AsCall());
+ break;
+
+ case GT_STOREIND:
+ {
+ info->srcCount = 2;
+ info->dstCount = 0;
+ GenTree* src = tree->gtOp.gtOp2;
+
+ if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
+ {
+ TreeNodeInfoInitGCWriteBarrier(tree);
+ break;
+ }
+
+ TreeNodeInfoInitIndir(tree);
+ }
+ break;
+
+ case GT_NULLCHECK:
+ info->dstCount = 0;
+ info->srcCount = 1;
+ info->isLocalDefUse = true;
+ // null check is an indirection on an addr
+ TreeNodeInfoInitIndir(tree);
+ break;
+
+ case GT_IND:
+ info->dstCount = 1;
+ info->srcCount = 1;
+ TreeNodeInfoInitIndir(tree);
+ break;
+
+ case GT_CATCH_ARG:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
+ break;
+
+ case GT_CLS_VAR:
+ info->srcCount = 0;
+ // GT_CLS_VAR, by the time we reach the backend, must always
+ // be a pure use.
+ // It will produce a result of the type of the
+ // node, and use an internal register for the address.
+
+ info->dstCount = 1;
+ assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0);
+ info->internalIntCount = 1;
+ break;
+
+ default:
+#ifdef DEBUG
+ JitTls::GetCompiler()->gtDispTree(tree);
+#endif
+ NYI_ARM("TreeNodeInfoInit default case");
+ case GT_LCL_FLD:
+ case GT_LCL_VAR:
+ case GT_LCL_VAR_ADDR:
+ case GT_CLS_VAR_ADDR:
+ case GT_IL_OFFSET:
+ case GT_CNS_INT:
+ case GT_PUTARG_REG:
+ case GT_PUTARG_STK:
+ info->dstCount = tree->IsValue() ? 1 : 0;
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ info->srcCount = 0;
+ }
+ else if (kind & (GTK_SMPOP))
+ {
+ if (tree->gtGetOp2IfPresent() != nullptr)
+ {
+ info->srcCount = 2;
+ }
+ else
+ {
+ info->srcCount = 1;
+ }
+ }
+ else
+ {
+ unreached();
+ }
+ break;
+ } // end switch (tree->OperGet())
+
+ // We need to be sure that we've set info->srcCount and info->dstCount appropriately
+ assert((info->dstCount < 2) || tree->IsMultiRegCall());
+}
+
+#endif // _TARGET_ARM_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/lsraarm64.cpp b/src/jit/lsraarm64.cpp
new file mode 100644
index 0000000000..0db30e1811
--- /dev/null
+++ b/src/jit/lsraarm64.cpp
@@ -0,0 +1,1766 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Register Requirements for ARM64 XX
+XX XX
+XX This encapsulates all the logic for setting register requirements for XX
+XX the ARM64 architecture. XX
+XX XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_ARM64_
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitStoreLoc: Set register requirements for a store of a lclVar
+//
+// Arguments:
+// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
+//
+// Notes:
+// This involves:
+// - Setting the appropriate candidates for a store of a multi-reg call return value.
+// - Handling of contained immediates.
+
+void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
+{
+ TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
+
+ // Is this the case of var = call where call is returning
+ // a value in multiple return registers?
+ GenTree* op1 = storeLoc->gtGetOp1();
+ if (op1->IsMultiRegCall())
+ {
+ // backend expects to see this case only for store lclvar.
+ assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
+
+ // srcCount = number of registers in which the value is returned by call
+ GenTreeCall* call = op1->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ info->srcCount = retTypeDesc->GetReturnRegCount();
+
+ // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
+ regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
+ op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
+ return;
+ }
+
+ CheckImmedAndMakeContained(storeLoc, op1);
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInit: Set the register requirements for RA.
+//
+// Notes:
+// Takes care of annotating the register requirements
+// for every TreeNodeInfo struct that maps to each tree node.
+//
+// Preconditions:
+// LSRA has been initialized and there is a TreeNodeInfo node
+// already allocated and initialized for every tree in the IR.
+//
+// Postconditions:
+// Every TreeNodeInfo instance has the right annotations on register
+// requirements needed by LSRA to build the Interval Table (source,
+// destination and internal [temp] register counts).
+//
+void Lowering::TreeNodeInfoInit(GenTree* tree)
+{
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ unsigned kind = tree->OperKind();
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ RegisterType registerType = TypeGet(tree);
+
+ JITDUMP("TreeNodeInfoInit for: ");
+ DISPNODE(tree);
+ JITDUMP("\n");
+
+ switch (tree->OperGet())
+ {
+ GenTree* op1;
+ GenTree* op2;
+
+ default:
+ info->dstCount = tree->IsValue() ? 1 : 0;
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ info->srcCount = 0;
+ }
+ else if (kind & (GTK_SMPOP))
+ {
+ if (tree->gtGetOp2IfPresent() != nullptr)
+ {
+ info->srcCount = 2;
+ }
+ else
+ {
+ info->srcCount = 1;
+ }
+ }
+ else
+ {
+ unreached();
+ }
+ break;
+
+ case GT_STORE_LCL_FLD:
+ case GT_STORE_LCL_VAR:
+ info->srcCount = 1;
+ info->dstCount = 0;
+ LowerStoreLoc(tree->AsLclVarCommon());
+ TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon());
+ break;
+
+ case GT_BOX:
+ noway_assert(!"box should not exist here");
+ // The result of 'op1' is also the final result
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_PHYSREGDST:
+ info->srcCount = 1;
+ info->dstCount = 0;
+ break;
+
+ case GT_COMMA:
+ {
+ GenTreePtr firstOperand;
+ GenTreePtr secondOperand;
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ firstOperand = tree->gtOp.gtOp2;
+ secondOperand = tree->gtOp.gtOp1;
+ }
+ else
+ {
+ firstOperand = tree->gtOp.gtOp1;
+ secondOperand = tree->gtOp.gtOp2;
+ }
+ if (firstOperand->TypeGet() != TYP_VOID)
+ {
+ firstOperand->gtLsraInfo.isLocalDefUse = true;
+ firstOperand->gtLsraInfo.dstCount = 0;
+ }
+ if (tree->TypeGet() == TYP_VOID && secondOperand->TypeGet() != TYP_VOID)
+ {
+ secondOperand->gtLsraInfo.isLocalDefUse = true;
+ secondOperand->gtLsraInfo.dstCount = 0;
+ }
+ }
+
+ __fallthrough;
+
+ case GT_LIST:
+ case GT_FIELD_LIST:
+ case GT_ARGPLACE:
+ case GT_NO_OP:
+ case GT_START_NONGC:
+ case GT_PROF_HOOK:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_CNS_DBL:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ {
+ GenTreeDblCon* dblConst = tree->AsDblCon();
+ double constValue = dblConst->gtDblCon.gtDconVal;
+
+ if (emitter::emitIns_valid_imm_for_fmov(constValue))
+ {
+ // Directly encode constant to instructions.
+ }
+ else
+ {
+ // Reserve int to load constant from memory (IF_LARGELDC)
+ info->internalIntCount = 1;
+ }
+ }
+ break;
+
+ case GT_QMARK:
+ case GT_COLON:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ unreached();
+ break;
+
+ case GT_RETURN:
+ TreeNodeInfoInitReturn(tree);
+ break;
+
+ case GT_RETFILT:
+ if (tree->TypeGet() == TYP_VOID)
+ {
+ info->srcCount = 0;
+ info->dstCount = 0;
+ }
+ else
+ {
+ assert(tree->TypeGet() == TYP_INT);
+
+ info->srcCount = 1;
+ info->dstCount = 0;
+
+ info->setSrcCandidates(l, RBM_INTRET);
+ tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
+ }
+ break;
+
+ case GT_NOP:
+ // A GT_NOP is either a passthrough (if it is void, or if it has
+ // a child), but must be considered to produce a dummy value if it
+ // has a type but no child
+ info->srcCount = 0;
+ if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
+ {
+ info->dstCount = 1;
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
+ break;
+
+ case GT_JTRUE:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ l->clearDstCount(tree->gtOp.gtOp1);
+ break;
+
+ case GT_JMP:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_SWITCH:
+ // This should never occur since switch nodes must not be visible at this
+ // point in the JIT.
+ info->srcCount = 0;
+ info->dstCount = 0; // To avoid getting uninit errors.
+ noway_assert(!"Switch must be lowered at this point");
+ break;
+
+ case GT_JMPTABLE:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ break;
+
+ case GT_SWITCH_TABLE:
+ info->srcCount = 2;
+ info->internalIntCount = 1;
+ info->dstCount = 0;
+ break;
+
+ case GT_ASG:
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ noway_assert(!"We should never hit any assignment operator in lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_ADD:
+ case GT_SUB:
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ // overflow operations aren't supported on float/double types.
+ assert(!tree->gtOverflow());
+
+ // No implicit conversions at this stage as the expectation is that
+ // everything is made explicit by adding casts.
+ assert(tree->gtOp.gtOp1->TypeGet() == tree->gtOp.gtOp2->TypeGet());
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ break;
+ }
+
+ __fallthrough;
+
+ case GT_AND:
+ case GT_OR:
+ case GT_XOR:
+ info->srcCount = 2;
+ info->dstCount = 1;
+ // Check and make op2 contained (if it is a containable immediate)
+ CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+ break;
+
+ case GT_RETURNTRAP:
+ // this just turns into a compare of its child with an int
+ // + a conditional call
+ info->srcCount = 1;
+ info->dstCount = 0;
+ break;
+
+ case GT_MOD:
+ case GT_UMOD:
+ NYI_IF(varTypeIsFloating(tree->TypeGet()), "FP Remainder in ARM64");
+ assert(!"Shouldn't see an integer typed GT_MOD node in ARM64");
+ break;
+
+ case GT_MUL:
+ if (tree->gtOverflow())
+ {
+ // Need a register different from target reg to check for overflow.
+ info->internalIntCount = 2;
+ }
+ __fallthrough;
+
+ case GT_DIV:
+ case GT_MULHI:
+ case GT_UDIV:
+ {
+ info->srcCount = 2;
+ info->dstCount = 1;
+ }
+ break;
+
+ case GT_INTRINSIC:
+ {
+ // TODO-ARM64-NYI
+ // Right now only Abs/Round/Sqrt are treated as math intrinsics
+ noway_assert((tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs) ||
+ (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Round) ||
+ (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Sqrt));
+
+ // Both operand and its result must be of the same floating point type.
+ op1 = tree->gtOp.gtOp1;
+ assert(varTypeIsFloating(op1));
+ assert(op1->TypeGet() == tree->TypeGet());
+
+ info->srcCount = 1;
+ info->dstCount = 1;
+ }
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ TreeNodeInfoInitSIMD(tree);
+ break;
+#endif // FEATURE_SIMD
+
+ case GT_CAST:
+ {
+ // TODO-ARM64-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned
+ // register.
+ // see CodeGen::genIntToIntCast()
+
+ info->srcCount = 1;
+ info->dstCount = 1;
+
+ // Non-overflow casts to/from float/double are done using SSE2 instructions
+ // and that allow the source operand to be either a reg or memop. Given the
+ // fact that casts from small int to float/double are done as two-level casts,
+ // the source operand is always guaranteed to be of size 4 or 8 bytes.
+ var_types castToType = tree->CastToType();
+ GenTreePtr castOp = tree->gtCast.CastOp();
+ var_types castOpType = castOp->TypeGet();
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ castOpType = genUnsignedType(castOpType);
+ }
+#ifdef DEBUG
+ if (!tree->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(castOpType)))
+ {
+ // If converting to float/double, the operand must be 4 or 8 byte in size.
+ if (varTypeIsFloating(castToType))
+ {
+ unsigned opSize = genTypeSize(castOpType);
+ assert(opSize == 4 || opSize == 8);
+ }
+ }
+#endif // DEBUG
+ // Some overflow checks need a temp reg
+
+ CastInfo castInfo;
+
+ // Get information about the cast.
+ getCastDescription(tree, &castInfo);
+
+ if (castInfo.requiresOverflowCheck)
+ {
+ var_types srcType = castOp->TypeGet();
+ emitAttr cmpSize = EA_ATTR(genTypeSize(srcType));
+
+ // If we cannot store the comparisons in an immediate for either
+ // comparing against the max or min value, then we will need to
+ // reserve a temporary register.
+
+ bool canStoreMaxValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMax, cmpSize);
+ bool canStoreMinValue = emitter::emitIns_valid_imm_for_cmp(castInfo.typeMin, cmpSize);
+
+ if (!canStoreMaxValue || !canStoreMinValue)
+ {
+ info->internalIntCount = 1;
+ }
+ }
+ }
+ break;
+
+ case GT_NEG:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ break;
+
+ case GT_NOT:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROR:
+ {
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ GenTreePtr shiftBy = tree->gtOp.gtOp2;
+ GenTreePtr source = tree->gtOp.gtOp1;
+ if (shiftBy->IsCnsIntOrI())
+ {
+ l->clearDstCount(shiftBy);
+ info->srcCount--;
+ }
+ }
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ TreeNodeInfoInitCmp(tree);
+ break;
+
+ case GT_CKFINITE:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ info->internalIntCount = 1;
+ break;
+
+ case GT_CMPXCHG:
+ info->srcCount = 3;
+ info->dstCount = 1;
+
+ // TODO-ARM64-NYI
+ NYI("CMPXCHG");
+ break;
+
+ case GT_LOCKADD:
+ info->srcCount = 2;
+ info->dstCount = 0;
+ CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+ break;
+
+ case GT_CALL:
+ TreeNodeInfoInitCall(tree->AsCall());
+ break;
+
+ case GT_ADDR:
+ {
+ // For a GT_ADDR, the child node should not be evaluated into a register
+ GenTreePtr child = tree->gtOp.gtOp1;
+ assert(!l->isCandidateLocalRef(child));
+ l->clearDstCount(child);
+ info->srcCount = 0;
+ info->dstCount = 1;
+ }
+ break;
+
+ case GT_BLK:
+ case GT_DYN_BLK:
+ // These should all be eliminated prior to Lowering.
+ assert(!"Non-store block node in Lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_STORE_BLK:
+ case GT_STORE_OBJ:
+ case GT_STORE_DYN_BLK:
+ LowerBlockStore(tree->AsBlk());
+ TreeNodeInfoInitBlockStore(tree->AsBlk());
+ break;
+
+ case GT_INIT_VAL:
+ // Always a passthrough of its child's value.
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_LCLHEAP:
+ {
+ info->srcCount = 1;
+ info->dstCount = 1;
+
+ // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
+ // Here '-' means don't care.
+ //
+ // Size? Init Memory? # temp regs
+ // 0 - 0
+ // const and <=6 ptr words - 0
+ // const and <PageSize No 0
+ // >6 ptr words Yes hasPspSym ? 1 : 0
+ // Non-const Yes hasPspSym ? 1 : 0
+ // Non-const No 2
+ //
+ // PSPSym - If the method has PSPSym increment internalIntCount by 1.
+ //
+ bool hasPspSym;
+#if FEATURE_EH_FUNCLETS
+ hasPspSym = (compiler->lvaPSPSym != BAD_VAR_NUM);
+#else
+ hasPspSym = false;
+#endif
+
+ GenTreePtr size = tree->gtOp.gtOp1;
+ if (size->IsCnsIntOrI())
+ {
+ MakeSrcContained(tree, size);
+
+ size_t sizeVal = size->gtIntCon.gtIconVal;
+
+ if (sizeVal == 0)
+ {
+ info->internalIntCount = 0;
+ }
+ else
+ {
+ // Compute the amount of memory to properly STACK_ALIGN.
+ // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
+ // This should also help in debugging as we can examine the original size specified with
+ // localloc.
+ sizeVal = AlignUp(sizeVal, STACK_ALIGN);
+ size_t cntStackAlignedWidthItems = (sizeVal >> STACK_ALIGN_SHIFT);
+
+ // For small allocations upto 4 'stp' instructions (i.e. 64 bytes of localloc)
+ //
+ if (cntStackAlignedWidthItems <= 4)
+ {
+ info->internalIntCount = 0;
+ }
+ else if (!compiler->info.compInitMem)
+ {
+ // No need to initialize allocated stack space.
+ if (sizeVal < compiler->eeGetPageSize())
+ {
+ info->internalIntCount = 0;
+ }
+ else
+ {
+ // We need two registers: regCnt and RegTmp
+ info->internalIntCount = 2;
+ }
+ }
+ else
+ {
+ // greater than 4 and need to zero initialize allocated stack space.
+ // If the method has PSPSym, we need an internal register to hold regCnt
+ // since targetReg allocated to GT_LCLHEAP node could be the same as one of
+ // the the internal registers.
+ info->internalIntCount = hasPspSym ? 1 : 0;
+ }
+ }
+ }
+ else
+ {
+ if (!compiler->info.compInitMem)
+ {
+ info->internalIntCount = 2;
+ }
+ else
+ {
+ // If the method has PSPSym, we need an internal register to hold regCnt
+ // since targetReg allocated to GT_LCLHEAP node could be the same as one of
+ // the the internal registers.
+ info->internalIntCount = hasPspSym ? 1 : 0;
+ }
+ }
+
+ // If the method has PSPSym, we would need an addtional register to relocate it on stack.
+ if (hasPspSym)
+ {
+ // Exclude const size 0
+ if (!size->IsCnsIntOrI() || (size->gtIntCon.gtIconVal > 0))
+ info->internalIntCount++;
+ }
+ }
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ {
+ GenTreeBoundsChk* node = tree->AsBoundsChk();
+ // Consumes arrLen & index - has no result
+ info->srcCount = 2;
+ info->dstCount = 0;
+
+ GenTree* intCns = nullptr;
+ GenTree* other = nullptr;
+ if (CheckImmedAndMakeContained(tree, node->gtIndex))
+ {
+ intCns = node->gtIndex;
+ other = node->gtArrLen;
+ }
+ else if (CheckImmedAndMakeContained(tree, node->gtArrLen))
+ {
+ intCns = node->gtArrLen;
+ other = node->gtIndex;
+ }
+ else
+ {
+ other = node->gtIndex;
+ }
+ }
+ break;
+
+ case GT_ARR_ELEM:
+ // These must have been lowered to GT_ARR_INDEX
+ noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_ARR_INDEX:
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ // We need one internal register when generating code for GT_ARR_INDEX, however the
+ // register allocator always may just give us the same one as it gives us for the 'dst'
+ // as a workaround we will just ask for two internal registers.
+ //
+ info->internalIntCount = 2;
+
+ // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
+ // times while the result is being computed.
+ tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
+ info->hasDelayFreeSrc = true;
+ break;
+
+ case GT_ARR_OFFSET:
+ // This consumes the offset, if any, the arrObj and the effective index,
+ // and produces the flattened offset for this dimension.
+ info->srcCount = 3;
+ info->dstCount = 1;
+ info->internalIntCount = 1;
+
+ // we don't want to generate code for this
+ if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
+ {
+ MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
+ }
+ break;
+
+ case GT_LEA:
+ {
+ GenTreeAddrMode* lea = tree->AsAddrMode();
+
+ GenTree* base = lea->Base();
+ GenTree* index = lea->Index();
+ unsigned cns = lea->gtOffset;
+
+ // This LEA is instantiating an address,
+ // so we set up the srcCount and dstCount here.
+ info->srcCount = 0;
+ if (base != nullptr)
+ {
+ info->srcCount++;
+ }
+ if (index != nullptr)
+ {
+ info->srcCount++;
+ }
+ info->dstCount = 1;
+
+ // On ARM64 we may need a single internal register
+ // (when both conditions are true then we still only need a single internal register)
+ if ((index != nullptr) && (cns != 0))
+ {
+ // ARM64 does not support both Index and offset so we need an internal register
+ info->internalIntCount = 1;
+ }
+ else if (!emitter::emitIns_valid_imm_for_add(cns, EA_8BYTE))
+ {
+ // This offset can't be contained in the add instruction, so we need an internal register
+ info->internalIntCount = 1;
+ }
+ }
+ break;
+
+ case GT_STOREIND:
+ {
+ info->srcCount = 2;
+ info->dstCount = 0;
+ GenTree* src = tree->gtOp.gtOp2;
+
+ if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
+ {
+ TreeNodeInfoInitGCWriteBarrier(tree);
+ break;
+ }
+ if (!varTypeIsFloating(src->TypeGet()) && src->IsIntegralConst(0))
+ {
+ // an integer zero for 'src' can be contained.
+ MakeSrcContained(tree, src);
+ }
+
+ TreeNodeInfoInitIndir(tree);
+ }
+ break;
+
+ case GT_NULLCHECK:
+ info->dstCount = 0;
+ info->srcCount = 1;
+ info->isLocalDefUse = true;
+ // null check is an indirection on an addr
+ TreeNodeInfoInitIndir(tree);
+ break;
+
+ case GT_IND:
+ info->dstCount = 1;
+ info->srcCount = 1;
+ TreeNodeInfoInitIndir(tree);
+ break;
+
+ case GT_CATCH_ARG:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
+ break;
+
+ case GT_CLS_VAR:
+ info->srcCount = 0;
+ // GT_CLS_VAR, by the time we reach the backend, must always
+ // be a pure use.
+ // It will produce a result of the type of the
+ // node, and use an internal register for the address.
+
+ info->dstCount = 1;
+ assert((tree->gtFlags & (GTF_VAR_DEF | GTF_VAR_USEASG | GTF_VAR_USEDEF)) == 0);
+ info->internalIntCount = 1;
+ break;
+ } // end switch (tree->OperGet())
+
+ // We need to be sure that we've set info->srcCount and info->dstCount appropriately
+ assert((info->dstCount < 2) || tree->IsMultiRegCall());
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ GenTree* op1 = tree->gtGetOp1();
+ regMaskTP useCandidates = RBM_NONE;
+
+ info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ info->dstCount = 0;
+
+ if (varTypeIsStruct(tree))
+ {
+ // op1 has to be either an lclvar or a multi-reg returning call
+ if ((op1->OperGet() == GT_LCL_VAR) || (op1->OperGet() == GT_LCL_FLD))
+ {
+ GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclVarCommon->gtLclNum]);
+ assert(varDsc->lvIsMultiRegRet);
+
+ // Mark var as contained if not enregistrable.
+ if (!varTypeIsEnregisterableStruct(op1))
+ {
+ MakeSrcContained(tree, op1);
+ }
+ }
+ else
+ {
+ noway_assert(op1->IsMultiRegCall());
+
+ ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
+ info->srcCount = retTypeDesc->GetReturnRegCount();
+ useCandidates = retTypeDesc->GetABIReturnRegs();
+ }
+ }
+ else
+ {
+ // Non-struct type return - determine useCandidates
+ switch (tree->TypeGet())
+ {
+ case TYP_VOID:
+ useCandidates = RBM_NONE;
+ break;
+ case TYP_FLOAT:
+ useCandidates = RBM_FLOATRET;
+ break;
+ case TYP_DOUBLE:
+ useCandidates = RBM_DOUBLERET;
+ break;
+ case TYP_LONG:
+ useCandidates = RBM_LNGRET;
+ break;
+ default:
+ useCandidates = RBM_INTRET;
+ break;
+ }
+ }
+
+ if (useCandidates != RBM_NONE)
+ {
+ tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, useCandidates);
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCall: Set the NodeInfo for a call.
+//
+// Arguments:
+// call - The call node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
+{
+ TreeNodeInfo* info = &(call->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+ bool hasMultiRegRetVal = false;
+ ReturnTypeDesc* retTypeDesc = nullptr;
+
+ info->srcCount = 0;
+ if (call->TypeGet() != TYP_VOID)
+ {
+ hasMultiRegRetVal = call->HasMultiRegRetVal();
+ if (hasMultiRegRetVal)
+ {
+ // dst count = number of registers in which the value is returned by call
+ retTypeDesc = call->GetReturnTypeDesc();
+ info->dstCount = retTypeDesc->GetReturnRegCount();
+ }
+ else
+ {
+ info->dstCount = 1;
+ }
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
+
+ GenTree* ctrlExpr = call->gtControlExpr;
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ // either gtControlExpr != null or gtCallAddr != null.
+ // Both cannot be non-null at the same time.
+ assert(ctrlExpr == nullptr);
+ assert(call->gtCallAddr != nullptr);
+ ctrlExpr = call->gtCallAddr;
+ }
+
+ // set reg requirements on call target represented as control sequence.
+ if (ctrlExpr != nullptr)
+ {
+ // we should never see a gtControlExpr whose type is void.
+ assert(ctrlExpr->TypeGet() != TYP_VOID);
+
+ info->srcCount++;
+
+ // In case of fast tail implemented as jmp, make sure that gtControlExpr is
+ // computed into a register.
+ if (call->IsFastTailCall())
+ {
+ // Fast tail call - make sure that call target is always computed in IP0
+ // so that epilog sequence can generate "br xip0" to achieve fast tail call.
+ ctrlExpr->gtLsraInfo.setSrcCandidates(l, genRegMask(REG_IP0));
+ }
+ }
+
+ RegisterType registerType = call->TypeGet();
+
+ // Set destination candidates for return value of the call.
+ if (hasMultiRegRetVal)
+ {
+ assert(retTypeDesc != nullptr);
+ info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
+ }
+ else if (varTypeIsFloating(registerType))
+ {
+ info->setDstCandidates(l, RBM_FLOATRET);
+ }
+ else if (registerType == TYP_LONG)
+ {
+ info->setDstCandidates(l, RBM_LNGRET);
+ }
+ else
+ {
+ info->setDstCandidates(l, RBM_INTRET);
+ }
+
+ // If there is an explicit this pointer, we don't want that node to produce anything
+ // as it is redundant
+ if (call->gtCallObjp != nullptr)
+ {
+ GenTreePtr thisPtrNode = call->gtCallObjp;
+
+ if (thisPtrNode->gtOper == GT_PUTARG_REG)
+ {
+ l->clearOperandCounts(thisPtrNode);
+ l->clearDstCount(thisPtrNode->gtOp.gtOp1);
+ }
+ else
+ {
+ l->clearDstCount(thisPtrNode);
+ }
+ }
+
+ // First, count reg args
+ bool callHasFloatRegArgs = false;
+
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->OperIsList());
+
+ GenTreePtr argNode = list->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
+ assert(curArgTabEntry);
+
+ if (curArgTabEntry->regNum == REG_STK)
+ {
+ // late arg that is not passed in a register
+ assert(argNode->gtOper == GT_PUTARG_STK);
+
+ TreeNodeInfoInitPutArgStk(argNode->AsPutArgStk(), curArgTabEntry);
+ continue;
+ }
+
+ var_types argType = argNode->TypeGet();
+ bool argIsFloat = varTypeIsFloating(argType);
+ callHasFloatRegArgs |= argIsFloat;
+
+ regNumber argReg = curArgTabEntry->regNum;
+ // We will setup argMask to the set of all registers that compose this argument
+ regMaskTP argMask = 0;
+
+ argNode = argNode->gtEffectiveVal();
+
+ // A GT_FIELD_LIST has a TYP_VOID, but is used to represent a multireg struct
+ if (varTypeIsStruct(argNode) || (argNode->gtOper == GT_FIELD_LIST))
+ {
+ GenTreePtr actualArgNode = argNode;
+ unsigned originalSize = 0;
+
+ if (argNode->gtOper == GT_FIELD_LIST)
+ {
+ // There could be up to 2-4 PUTARG_REGs in the list (3 or 4 can only occur for HFAs)
+ GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
+
+ // Initailize the first register and the first regmask in our list
+ regNumber targetReg = argReg;
+ regMaskTP targetMask = genRegMask(targetReg);
+ unsigned iterationNum = 0;
+ originalSize = 0;
+
+ for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest())
+ {
+ GenTreePtr putArgRegNode = fieldListPtr->Current();
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+ GenTreePtr putArgChild = putArgRegNode->gtOp.gtOp1;
+
+ originalSize += REGSIZE_BYTES; // 8 bytes
+
+ // Record the register requirements for the GT_PUTARG_REG node
+ putArgRegNode->gtLsraInfo.setDstCandidates(l, targetMask);
+ putArgRegNode->gtLsraInfo.setSrcCandidates(l, targetMask);
+
+ // To avoid redundant moves, request that the argument child tree be
+ // computed in the register in which the argument is passed to the call.
+ putArgChild->gtLsraInfo.setSrcCandidates(l, targetMask);
+
+ // We consume one source for each item in this list
+ info->srcCount++;
+ iterationNum++;
+
+ // Update targetReg and targetMask for the next putarg_reg (if any)
+ targetReg = genRegArgNext(targetReg);
+ targetMask = genRegMask(targetReg);
+ }
+ }
+ else
+ {
+#ifdef DEBUG
+ compiler->gtDispTreeRange(BlockRange(), argNode);
+#endif
+ noway_assert(!"Unsupported TYP_STRUCT arg kind");
+ }
+
+ unsigned slots = ((unsigned)(roundUp(originalSize, REGSIZE_BYTES))) / REGSIZE_BYTES;
+ regNumber curReg = argReg;
+ regNumber lastReg = argIsFloat ? REG_ARG_FP_LAST : REG_ARG_LAST;
+ unsigned remainingSlots = slots;
+
+ while (remainingSlots > 0)
+ {
+ argMask |= genRegMask(curReg);
+ remainingSlots--;
+
+ if (curReg == lastReg)
+ break;
+
+ curReg = genRegArgNext(curReg);
+ }
+
+ // Struct typed arguments must be fully passed in registers (Reg/Stk split not allowed)
+ noway_assert(remainingSlots == 0);
+ argNode->gtLsraInfo.internalIntCount = 0;
+ }
+ else // A scalar argument (not a struct)
+ {
+ // We consume one source
+ info->srcCount++;
+
+ argMask |= genRegMask(argReg);
+ argNode->gtLsraInfo.setDstCandidates(l, argMask);
+ argNode->gtLsraInfo.setSrcCandidates(l, argMask);
+
+ if (argNode->gtOper == GT_PUTARG_REG)
+ {
+ GenTreePtr putArgChild = argNode->gtOp.gtOp1;
+
+ // To avoid redundant moves, request that the argument child tree be
+ // computed in the register in which the argument is passed to the call.
+ putArgChild->gtLsraInfo.setSrcCandidates(l, argMask);
+ }
+ }
+ }
+
+ // Now, count stack args
+ // Note that these need to be computed into a register, but then
+ // they're just stored to the stack - so the reg doesn't
+ // need to remain live until the call. In fact, it must not
+ // because the code generator doesn't actually consider it live,
+ // so it can't be spilled.
+
+ GenTreePtr args = call->gtCallArgs;
+ while (args)
+ {
+ GenTreePtr arg = args->gtOp.gtOp1;
+
+ // Skip arguments that have been moved to the Late Arg list
+ if (!(args->gtFlags & GTF_LATE_ARG))
+ {
+ if (arg->gtOper == GT_PUTARG_STK)
+ {
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, arg);
+ assert(curArgTabEntry);
+
+ assert(curArgTabEntry->regNum == REG_STK);
+
+ TreeNodeInfoInitPutArgStk(arg->AsPutArgStk(), curArgTabEntry);
+ }
+ else
+ {
+ TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
+ if (argInfo->dstCount != 0)
+ {
+ argInfo->isLocalDefUse = true;
+ }
+
+ argInfo->dstCount = 0;
+ }
+ }
+ args = args->gtOp.gtOp2;
+ }
+
+ // If it is a fast tail call, it is already preferenced to use IP0.
+ // Therefore, no need set src candidates on call tgt again.
+ if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
+ {
+ // Don't assign the call target to any of the argument registers because
+ // we will use them to also pass floating point arguments as required
+ // by Arm64 ABI.
+ ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK node
+//
+// Arguments:
+// argNode - a GT_PUTARG_STK node
+//
+// Return Value:
+// None.
+//
+// Notes:
+// Set the child node(s) to be contained when we have a multireg arg
+//
+void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* argNode, fgArgTabEntryPtr info)
+{
+ assert(argNode->gtOper == GT_PUTARG_STK);
+
+ GenTreePtr putArgChild = argNode->gtOp.gtOp1;
+
+ // Initialize 'argNode' as not contained, as this is both the default case
+ // and how MakeSrcContained expects to find things setup.
+ //
+ argNode->gtLsraInfo.srcCount = 1;
+ argNode->gtLsraInfo.dstCount = 0;
+
+ // Do we have a TYP_STRUCT argument (or a GT_FIELD_LIST), if so it must be a multireg pass-by-value struct
+ if ((putArgChild->TypeGet() == TYP_STRUCT) || (putArgChild->OperGet() == GT_FIELD_LIST))
+ {
+ // We will use store instructions that each write a register sized value
+
+ if (putArgChild->OperGet() == GT_FIELD_LIST)
+ {
+ // We consume all of the items in the GT_FIELD_LIST
+ argNode->gtLsraInfo.srcCount = info->numSlots;
+ }
+ else
+ {
+ // We could use a ldp/stp sequence so we need two internal registers
+ argNode->gtLsraInfo.internalIntCount = 2;
+
+ if (putArgChild->OperGet() == GT_OBJ)
+ {
+ GenTreePtr objChild = putArgChild->gtOp.gtOp1;
+ if (objChild->OperGet() == GT_LCL_VAR_ADDR)
+ {
+ // We will generate all of the code for the GT_PUTARG_STK, the GT_OBJ and the GT_LCL_VAR_ADDR
+ // as one contained operation
+ //
+ MakeSrcContained(putArgChild, objChild);
+ }
+ }
+
+ // We will generate all of the code for the GT_PUTARG_STK and it's child node
+ // as one contained operation
+ //
+ MakeSrcContained(argNode, putArgChild);
+ }
+ }
+ else
+ {
+ // We must not have a multi-reg struct
+ assert(info->numSlots == 1);
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
+//
+// Arguments:
+// blkNode - The block store node of interest
+//
+// Return Value:
+// None.
+//
+// Notes:
+
+void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+{
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ // Sources are dest address and initVal or source.
+ // We may require an additional source or temp register for the size.
+ blkNode->gtLsraInfo.srcCount = 2;
+ blkNode->gtLsraInfo.dstCount = 0;
+ GenTreePtr srcAddrOrFill = nullptr;
+ bool isInitBlk = blkNode->OperIsInitBlkOp();
+
+ if (!isInitBlk)
+ {
+ // CopyObj or CopyBlk
+ if (source->gtOper == GT_IND)
+ {
+ srcAddrOrFill = blkNode->Data()->gtGetOp1();
+ // We're effectively setting source as contained, but can't call MakeSrcContained, because the
+ // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
+ // If srcAddr is already non-contained, we don't need to change it.
+ if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0)
+ {
+ srcAddrOrFill->gtLsraInfo.setDstCount(1);
+ srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
+ }
+ m_lsra->clearOperandCounts(source);
+ }
+ else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
+ {
+ assert(source->IsLocal());
+ MakeSrcContained(blkNode, source);
+ }
+ }
+
+ if (isInitBlk)
+ {
+ GenTreePtr initVal = source;
+ if (initVal->OperIsInitVal())
+ {
+ initVal = initVal->gtGetOp1();
+ }
+ srcAddrOrFill = initVal;
+
+#if 0
+ if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)
+ {
+ // TODO-ARM64-CQ: Currently we generate a helper call for every
+ // initblk we encounter. Later on we should implement loop unrolling
+ // code sequences to improve CQ.
+ // For reference see the code in lsraxarch.cpp.
+ }
+ else
+#endif // 0
+ {
+ assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper);
+ // The helper follows the regular ABI.
+ dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
+ initVal->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
+ if (size != 0)
+ {
+ // Reserve a temp register for the block size argument.
+ blkNode->gtLsraInfo.setInternalCandidates(l, RBM_ARG_2);
+ blkNode->gtLsraInfo.internalIntCount = 1;
+ }
+ else
+ {
+ // The block size argument is a third argument to GT_STORE_DYN_BLK
+ noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ blkNode->gtLsraInfo.setSrcCount(3);
+ GenTree* sizeNode = blkNode->AsDynBlk()->gtDynamicSize;
+ sizeNode->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
+ }
+ }
+ }
+ else
+ {
+ // CopyObj or CopyBlk
+ // Sources are src and dest and size if not constant.
+
+ if (blkNode->OperGet() == GT_STORE_OBJ)
+ {
+ // CopyObj
+
+ // We don't need to materialize the struct size but we still need
+ // a temporary register to perform the sequence of loads and stores.
+ blkNode->gtLsraInfo.internalIntCount = 1;
+
+ dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_DST_BYREF);
+ // If we have a source address we want it in REG_WRITE_BARRIER_SRC_BYREF.
+ // Otherwise, if it is a local, codegen will put its address in REG_WRITE_BARRIER_SRC_BYREF,
+ // which is killed by a StoreObj (and thus needn't be reserved).
+ if (srcAddrOrFill != nullptr)
+ {
+ srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_WRITE_BARRIER_SRC_BYREF);
+ }
+ }
+ else
+ {
+ // CopyBlk
+ short internalIntCount = 0;
+ regMaskTP internalIntCandidates = RBM_NONE;
+
+#if 0
+ if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindUnroll)
+ {
+ // TODO-ARM64-CQ: cpblk loop unrolling is currently not implemented.
+ // In case of a CpBlk with a constant size and less than CPBLK_UNROLL_LIMIT size
+ // we should unroll the loop to improve CQ.
+ // For reference see the code in lsraxarch.cpp.
+ }
+ else
+#endif // 0
+ {
+ assert(blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindHelper);
+ dstAddr->gtLsraInfo.setSrcCandidates(l, RBM_ARG_0);
+ // The srcAddr goes in arg1.
+ if (srcAddrOrFill != nullptr)
+ {
+ srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, RBM_ARG_1);
+ }
+ if (size != 0)
+ {
+ // Reserve a temp register for the block size argument.
+ internalIntCandidates |= RBM_ARG_2;
+ internalIntCount++;
+ }
+ else
+ {
+ // The block size argument is a third argument to GT_STORE_DYN_BLK
+ noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ blkNode->gtLsraInfo.setSrcCount(3);
+ GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
+ blockSize->gtLsraInfo.setSrcCandidates(l, RBM_ARG_2);
+ }
+ }
+ if (internalIntCount != 0)
+ {
+ blkNode->gtLsraInfo.internalIntCount = internalIntCount;
+ blkNode->gtLsraInfo.setInternalCandidates(l, internalIntCandidates);
+ }
+ }
+ }
+}
+
+#ifdef FEATURE_SIMD
+//------------------------------------------------------------------------
+// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree.
+//
+// Arguments:
+// tree - The GT_SIMD node of interest
+//
+// Return Value:
+// None.
+
+void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
+{
+ NYI("TreeNodeInfoInitSIMD");
+ GenTreeSIMD* simdTree = tree->AsSIMD();
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* lsra = m_lsra;
+ info->dstCount = 1;
+ switch (simdTree->gtSIMDIntrinsicID)
+ {
+ case SIMDIntrinsicInit:
+ {
+ // This sets all fields of a SIMD struct to the given value.
+ // Mark op1 as contained if it is either zero or int constant of all 1's.
+ info->srcCount = 1;
+ GenTree* op1 = tree->gtOp.gtOp1;
+ if (op1->IsIntegralConst(0) || (simdTree->gtSIMDBaseType == TYP_INT && op1->IsCnsIntOrI() &&
+ op1->AsIntConCommon()->IconValue() == 0xffffffff) ||
+ (simdTree->gtSIMDBaseType == TYP_LONG && op1->IsCnsIntOrI() &&
+ op1->AsIntConCommon()->IconValue() == 0xffffffffffffffffLL))
+ {
+ MakeSrcContained(tree, tree->gtOp.gtOp1);
+ info->srcCount = 0;
+ }
+ }
+ break;
+
+ case SIMDIntrinsicInitN:
+ info->srcCount = (int)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType));
+ // Need an internal register to stitch together all the values into a single vector in an XMM reg.
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ break;
+
+ case SIMDIntrinsicInitArray:
+ // We have an array and an index, which may be contained.
+ info->srcCount = 2;
+ CheckImmedAndMakeContained(tree, tree->gtGetOp2());
+ break;
+
+ case SIMDIntrinsicDiv:
+ // SSE2 has no instruction support for division on integer vectors
+ noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 2;
+ break;
+
+ case SIMDIntrinsicAbs:
+ // This gets implemented as bitwise-And operation with a mask
+ // and hence should never see it here.
+ unreached();
+ break;
+
+ case SIMDIntrinsicSqrt:
+ // SSE2 has no instruction support for sqrt on integer vectors.
+ noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 1;
+ break;
+
+ case SIMDIntrinsicAdd:
+ case SIMDIntrinsicSub:
+ case SIMDIntrinsicMul:
+ case SIMDIntrinsicBitwiseAnd:
+ case SIMDIntrinsicBitwiseAndNot:
+ case SIMDIntrinsicBitwiseOr:
+ case SIMDIntrinsicBitwiseXor:
+ case SIMDIntrinsicMin:
+ case SIMDIntrinsicMax:
+ info->srcCount = 2;
+
+ // SSE2 32-bit integer multiplication requires two temp regs
+ if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT)
+ {
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ break;
+
+ case SIMDIntrinsicEqual:
+ info->srcCount = 2;
+ break;
+
+ // SSE2 doesn't support < and <= directly on int vectors.
+ // Instead we need to use > and >= with swapped operands.
+ case SIMDIntrinsicLessThan:
+ case SIMDIntrinsicLessThanOrEqual:
+ info->srcCount = 2;
+ noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType));
+ break;
+
+ // SIMDIntrinsicEqual is supported only on non-floating point base type vectors.
+ // SSE2 cmpps/pd doesn't support > and >= directly on float/double vectors.
+ // Instead we need to use < and <= with swapped operands.
+ case SIMDIntrinsicGreaterThan:
+ noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 2;
+ break;
+
+ case SIMDIntrinsicGreaterThanOrEqual:
+ noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 2;
+
+ // a >= b = (a==b) | (a>b)
+ // To hold intermediate result of a==b and a>b we need two distinct
+ // registers. We can use targetReg and one internal reg provided
+ // they are distinct which is not guaranteed. Therefore, we request
+ // two internal registers so that one of the internal registers has
+ // to be different from targetReg.
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ break;
+
+ case SIMDIntrinsicOpEquality:
+ case SIMDIntrinsicOpInEquality:
+ // Need two SIMD registers as scratch.
+ // See genSIMDIntrinsicRelOp() for details on code sequence generate and
+ // the need for two scratch registers.
+ info->srcCount = 2;
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ break;
+
+ case SIMDIntrinsicDotProduct:
+ // Also need an internal register as scratch. Further we need that targetReg and internal reg
+ // are two distinct regs. It is achieved by requesting two internal registers and one of them
+ // has to be different from targetReg.
+ //
+ // See genSIMDIntrinsicDotProduct() for details on code sequence generated and
+ // the need for scratch registers.
+ info->srcCount = 2;
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ break;
+
+ case SIMDIntrinsicGetItem:
+ // This implements get_Item method. The sources are:
+ // - the source SIMD struct
+ // - index (which element to get)
+ // The result is baseType of SIMD struct.
+ info->srcCount = 2;
+
+ op2 = tree->gtGetOp2();
+
+ // If the index is a constant, mark it as contained.
+ if (CheckImmedAndMakeContained(tree, op2))
+ {
+ info->srcCount = 1;
+ }
+
+ // If the index is not a constant, we will use the SIMD temp location to store the vector.
+ // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we
+ // can use that in the process of extracting the element.
+ // In all other cases with constant index, we need a temp xmm register to extract the
+ // element if index is other than zero.
+ if (!op2->IsCnsIntOrI())
+ {
+ (void)comp->getSIMDInitTempVarNum();
+ }
+ else if (!varTypeIsFloating(simdTree->gtSIMDBaseType) && !op2->IsIntegralConst(0))
+ {
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ break;
+
+ case SIMDIntrinsicCast:
+ info->srcCount = 1;
+ break;
+
+ // These should have been transformed in terms of other intrinsics
+ case SIMDIntrinsicOpEquality:
+ case SIMDIntrinsicOpInEquality:
+ assert("OpEquality/OpInEquality intrinsics should not be seen during Lowering.");
+ unreached();
+
+ case SIMDIntrinsicGetX:
+ case SIMDIntrinsicGetY:
+ case SIMDIntrinsicGetZ:
+ case SIMDIntrinsicGetW:
+ case SIMDIntrinsicGetOne:
+ case SIMDIntrinsicGetZero:
+ case SIMDIntrinsicGetLength:
+ case SIMDIntrinsicGetAllOnes:
+ assert(!"Get intrinsics should not be seen during Lowering.");
+ unreached();
+
+ default:
+ noway_assert(!"Unimplemented SIMD node type.");
+ unreached();
+ }
+}
+#endif // FEATURE_SIMD
+
+void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
+{
+ GenTreePtr dst = tree;
+ GenTreePtr addr = tree->gtOp.gtOp1;
+ GenTreePtr src = tree->gtOp.gtOp2;
+
+ if (addr->OperGet() == GT_LEA)
+ {
+ // In the case where we are doing a helper assignment, if the dst
+ // is an indir through an lea, we need to actually instantiate the
+ // lea in a register
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+
+ short leaSrcCount = 0;
+ if (lea->Base() != nullptr)
+ {
+ leaSrcCount++;
+ }
+ if (lea->Index() != nullptr)
+ {
+ leaSrcCount++;
+ }
+ lea->gtLsraInfo.srcCount = leaSrcCount;
+ lea->gtLsraInfo.dstCount = 1;
+ }
+
+#if NOGC_WRITE_BARRIERS
+ // For the NOGC JIT Helper calls
+ //
+ // the 'addr' goes into x14 (REG_WRITE_BARRIER_DST_BYREF)
+ // the 'src' goes into x15 (REG_WRITE_BARRIER)
+ //
+ addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_DST_BYREF);
+ src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER);
+#else
+ // For the standard JIT Helper calls
+ // op1 goes into REG_ARG_0 and
+ // op2 goes into REG_ARG_1
+ //
+ addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
+ src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
+#endif // NOGC_WRITE_BARRIERS
+
+ // Both src and dst must reside in a register, which they should since we haven't set
+ // either of them as contained.
+ assert(addr->gtLsraInfo.dstCount == 1);
+ assert(src->gtLsraInfo.dstCount == 1);
+}
+
+//-----------------------------------------------------------------------------------------
+// TreeNodeInfoInitIndir: Specify register requirements for address expression of an indirection operation.
+//
+// Arguments:
+// indirTree - GT_IND or GT_STOREIND gentree node
+//
+void Lowering::TreeNodeInfoInitIndir(GenTreePtr indirTree)
+{
+ assert(indirTree->OperIsIndir());
+ // If this is the rhs of a block copy (i.e. non-enregisterable struct),
+ // it has no register requirements.
+ if (indirTree->TypeGet() == TYP_STRUCT)
+ {
+ return;
+ }
+
+ GenTreePtr addr = indirTree->gtGetOp1();
+ TreeNodeInfo* info = &(indirTree->gtLsraInfo);
+
+ GenTreePtr base = nullptr;
+ GenTreePtr index = nullptr;
+ unsigned cns = 0;
+ unsigned mul;
+ bool rev;
+ bool modifiedSources = false;
+
+ if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
+ {
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+ base = lea->Base();
+ index = lea->Index();
+ cns = lea->gtOffset;
+
+ m_lsra->clearOperandCounts(addr);
+ // The srcCount is decremented because addr is now "contained",
+ // then we account for the base and index below, if they are non-null.
+ info->srcCount--;
+ }
+ else if (comp->codeGen->genCreateAddrMode(addr, -1, true, 0, &rev, &base, &index, &mul, &cns, true /*nogen*/) &&
+ !(modifiedSources = AreSourcesPossiblyModifiedLocals(indirTree, base, index)))
+ {
+ // An addressing mode will be constructed that may cause some
+ // nodes to not need a register, and cause others' lifetimes to be extended
+ // to the GT_IND or even its parent if it's an assignment
+
+ assert(base != addr);
+ m_lsra->clearOperandCounts(addr);
+
+ GenTreePtr arrLength = nullptr;
+
+ // Traverse the computation below GT_IND to find the operands
+ // for the addressing mode, marking the various constants and
+ // intermediate results as not consuming/producing.
+ // If the traversal were more complex, we might consider using
+ // a traversal function, but the addressing mode is only made
+ // up of simple arithmetic operators, and the code generator
+ // only traverses one leg of each node.
+
+ bool foundBase = (base == nullptr);
+ bool foundIndex = (index == nullptr);
+ GenTreePtr nextChild = nullptr;
+ for (GenTreePtr child = addr; child != nullptr && !child->OperIsLeaf(); child = nextChild)
+ {
+ nextChild = nullptr;
+ GenTreePtr op1 = child->gtOp.gtOp1;
+ GenTreePtr op2 = (child->OperIsBinary()) ? child->gtOp.gtOp2 : nullptr;
+
+ if (op1 == base)
+ {
+ foundBase = true;
+ }
+ else if (op1 == index)
+ {
+ foundIndex = true;
+ }
+ else
+ {
+ m_lsra->clearOperandCounts(op1);
+ if (!op1->OperIsLeaf())
+ {
+ nextChild = op1;
+ }
+ }
+
+ if (op2 != nullptr)
+ {
+ if (op2 == base)
+ {
+ foundBase = true;
+ }
+ else if (op2 == index)
+ {
+ foundIndex = true;
+ }
+ else
+ {
+ m_lsra->clearOperandCounts(op2);
+ if (!op2->OperIsLeaf())
+ {
+ assert(nextChild == nullptr);
+ nextChild = op2;
+ }
+ }
+ }
+ }
+ assert(foundBase && foundIndex);
+ info->srcCount--; // it gets incremented below.
+ }
+ else if (addr->gtOper == GT_ARR_ELEM)
+ {
+ // The GT_ARR_ELEM consumes all the indices and produces the offset.
+ // The array object lives until the mem access.
+ // We also consume the target register to which the address is
+ // computed
+
+ info->srcCount++;
+ assert(addr->gtLsraInfo.srcCount >= 2);
+ addr->gtLsraInfo.srcCount -= 1;
+ }
+ else
+ {
+ // it is nothing but a plain indir
+ info->srcCount--; // base gets added in below
+ base = addr;
+ }
+
+ if (base != nullptr)
+ {
+ info->srcCount++;
+ }
+
+ if (index != nullptr && !modifiedSources)
+ {
+ info->srcCount++;
+ }
+
+ // On ARM64 we may need a single internal register
+ // (when both conditions are true then we still only need a single internal register)
+ if ((index != nullptr) && (cns != 0))
+ {
+ // ARM64 does not support both Index and offset so we need an internal register
+ info->internalIntCount = 1;
+ }
+ else if (!emitter::emitIns_valid_imm_for_ldst_offset(cns, emitTypeSize(indirTree)))
+ {
+ // This offset can't be contained in the ldr/str instruction, so we need an internal register
+ info->internalIntCount = 1;
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCmp: Set the register requirements for a compare.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+ CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+}
+
+#endif // _TARGET_ARM64_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp
new file mode 100644
index 0000000000..a4da2b7ce6
--- /dev/null
+++ b/src/jit/lsraxarch.cpp
@@ -0,0 +1,3684 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX Register Requirements for AMD64 XX
+XX XX
+XX This encapsulates all the logic for setting register requirements for XX
+XX the AMD64 architecture. XX
+XX XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef LEGACY_BACKEND // This file is ONLY used for the RyuJIT backend that uses the linear scan register allocator
+
+#ifdef _TARGET_XARCH_
+
+#include "jit.h"
+#include "sideeffects.h"
+#include "lower.h"
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitStoreLoc: Set register requirements for a store of a lclVar
+//
+// Arguments:
+// storeLoc - the local store (GT_STORE_LCL_FLD or GT_STORE_LCL_VAR)
+//
+// Notes:
+// This involves:
+// - Setting the appropriate candidates for a store of a multi-reg call return value.
+// - Requesting an internal register for SIMD12 stores.
+// - Handling of contained immediates.
+// - Widening operations of unsigneds. (TODO: Move to 1st phase of Lowering)
+
+void Lowering::TreeNodeInfoInitStoreLoc(GenTreeLclVarCommon* storeLoc)
+{
+ TreeNodeInfo* info = &(storeLoc->gtLsraInfo);
+
+ // Is this the case of var = call where call is returning
+ // a value in multiple return registers?
+ GenTree* op1 = storeLoc->gtGetOp1();
+ if (op1->IsMultiRegCall())
+ {
+ // backend expects to see this case only for store lclvar.
+ assert(storeLoc->OperGet() == GT_STORE_LCL_VAR);
+
+ // srcCount = number of registers in which the value is returned by call
+ GenTreeCall* call = op1->AsCall();
+ ReturnTypeDesc* retTypeDesc = call->GetReturnTypeDesc();
+ info->srcCount = retTypeDesc->GetReturnRegCount();
+
+ // Call node srcCandidates = Bitwise-OR(allregs(GetReturnRegType(i))) for all i=0..RetRegCount-1
+ regMaskTP srcCandidates = m_lsra->allMultiRegCallNodeRegs(call);
+ op1->gtLsraInfo.setSrcCandidates(m_lsra, srcCandidates);
+ return;
+ }
+
+#ifdef FEATURE_SIMD
+ if (varTypeIsSIMD(storeLoc))
+ {
+ if (op1->IsCnsIntOrI())
+ {
+ // InitBlk
+ MakeSrcContained(storeLoc, op1);
+ }
+ else if (storeLoc->TypeGet() == TYP_SIMD12)
+ {
+ // Need an additional register to extract upper 4 bytes of Vector3.
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
+
+ // In this case don't mark the operand as contained as we want it to
+ // be evaluated into an xmm register
+ }
+ return;
+ }
+#endif // FEATURE_SIMD
+
+ // If the source is a containable immediate, make it contained, unless it is
+ // an int-size or larger store of zero to memory, because we can generate smaller code
+ // by zeroing a register and then storing it.
+ if (IsContainableImmed(storeLoc, op1) && (!op1->IsIntegralConst(0) || varTypeIsSmall(storeLoc)))
+ {
+ MakeSrcContained(storeLoc, op1);
+ }
+
+ // TODO: This should be moved to Lowering, but it widens the types, which changes the behavior
+ // of the above condition.
+ LowerStoreLoc(storeLoc);
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInit: Set register requirements for a node
+//
+// Arguments:
+// treeNode - the node of interest
+//
+// Notes:
+// Preconditions:
+// LSRA Has been initialized and there is a TreeNodeInfo node
+// already allocated and initialized for every tree in the IR.
+// Postconditions:
+// Every TreeNodeInfo instance has the right annotations on register
+// requirements needed by LSRA to build the Interval Table (source,
+// destination and internal [temp] register counts).
+//
+void Lowering::TreeNodeInfoInit(GenTree* tree)
+{
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+#ifdef DEBUG
+ if (comp->verbose)
+ {
+ printf("TreeNodeInfoInit:\n");
+ comp->gtDispTreeRange(BlockRange(), tree);
+ }
+#endif
+ // floating type generates AVX instruction (vmovss etc.), set the flag
+ SetContainsAVXFlags(varTypeIsFloating(tree->TypeGet()));
+ switch (tree->OperGet())
+ {
+ GenTree* op1;
+ GenTree* op2;
+
+ default:
+ TreeNodeInfoInitSimple(tree);
+ break;
+
+ case GT_LCL_FLD:
+ case GT_LCL_VAR:
+ info->srcCount = 0;
+ info->dstCount = 1;
+
+#ifdef FEATURE_SIMD
+ // Need an additional register to read upper 4 bytes of Vector3.
+ if (tree->TypeGet() == TYP_SIMD12)
+ {
+ // We need an internal register different from targetReg in which 'tree' produces its result
+ // because both targetReg and internal reg will be in use at the same time.
+ info->internalFloatCount = 1;
+ info->isInternalRegDelayFree = true;
+ info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
+ }
+#endif
+ break;
+
+ case GT_STORE_LCL_FLD:
+ case GT_STORE_LCL_VAR:
+#ifdef _TARGET_X86_
+ if (tree->gtGetOp1()->OperGet() == GT_LONG)
+ {
+ info->srcCount = 2;
+ }
+ else
+#endif // _TARGET_X86_
+ {
+ info->srcCount = 1;
+ }
+ info->dstCount = 0;
+ TreeNodeInfoInitStoreLoc(tree->AsLclVarCommon());
+ break;
+
+ case GT_BOX:
+ noway_assert(!"box should not exist here");
+ // The result of 'op1' is also the final result
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_PHYSREGDST:
+ info->srcCount = 1;
+ info->dstCount = 0;
+ break;
+
+ case GT_COMMA:
+ {
+ GenTreePtr firstOperand;
+ GenTreePtr secondOperand;
+ if (tree->gtFlags & GTF_REVERSE_OPS)
+ {
+ firstOperand = tree->gtOp.gtOp2;
+ secondOperand = tree->gtOp.gtOp1;
+ }
+ else
+ {
+ firstOperand = tree->gtOp.gtOp1;
+ secondOperand = tree->gtOp.gtOp2;
+ }
+ if (firstOperand->TypeGet() != TYP_VOID)
+ {
+ firstOperand->gtLsraInfo.isLocalDefUse = true;
+ firstOperand->gtLsraInfo.dstCount = 0;
+ }
+ if (tree->TypeGet() == TYP_VOID && secondOperand->TypeGet() != TYP_VOID)
+ {
+ secondOperand->gtLsraInfo.isLocalDefUse = true;
+ secondOperand->gtLsraInfo.dstCount = 0;
+ }
+ }
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_LIST:
+ case GT_FIELD_LIST:
+ case GT_ARGPLACE:
+ case GT_NO_OP:
+ case GT_START_NONGC:
+ case GT_PROF_HOOK:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_CNS_DBL:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ break;
+
+#if !defined(_TARGET_64BIT_)
+
+ case GT_LONG:
+ if ((tree->gtLIRFlags & LIR::Flags::IsUnusedValue) != 0)
+ {
+ // An unused GT_LONG node needs to consume its sources.
+ info->srcCount = 2;
+ }
+ else
+ {
+ // Passthrough
+ info->srcCount = 0;
+ }
+
+ info->dstCount = 0;
+ break;
+
+#endif // !defined(_TARGET_64BIT_)
+
+ case GT_QMARK:
+ case GT_COLON:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ unreached();
+ break;
+
+ case GT_RETURN:
+ TreeNodeInfoInitReturn(tree);
+ break;
+
+ case GT_RETFILT:
+ if (tree->TypeGet() == TYP_VOID)
+ {
+ info->srcCount = 0;
+ info->dstCount = 0;
+ }
+ else
+ {
+ assert(tree->TypeGet() == TYP_INT);
+
+ info->srcCount = 1;
+ info->dstCount = 0;
+
+ info->setSrcCandidates(l, RBM_INTRET);
+ tree->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, RBM_INTRET);
+ }
+ break;
+
+ // A GT_NOP is either a passthrough (if it is void, or if it has
+ // a child), but must be considered to produce a dummy value if it
+ // has a type but no child
+ case GT_NOP:
+ info->srcCount = 0;
+ if (tree->TypeGet() != TYP_VOID && tree->gtOp.gtOp1 == nullptr)
+ {
+ info->dstCount = 1;
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
+ break;
+
+ case GT_JTRUE:
+ {
+ info->srcCount = 0;
+ info->dstCount = 0;
+
+ GenTree* cmp = tree->gtGetOp1();
+ l->clearDstCount(cmp);
+
+#ifdef FEATURE_SIMD
+ // Say we have the following IR
+ // simdCompareResult = GT_SIMD((In)Equality, v1, v2)
+ // integerCompareResult = GT_EQ/NE(simdCompareResult, true/false)
+ // GT_JTRUE(integerCompareResult)
+ //
+ // In this case we don't need to generate code for GT_EQ_/NE, since SIMD (In)Equality
+ // intrinsic will set or clear the Zero flag.
+
+ genTreeOps cmpOper = cmp->OperGet();
+ if (cmpOper == GT_EQ || cmpOper == GT_NE)
+ {
+ GenTree* cmpOp1 = cmp->gtGetOp1();
+ GenTree* cmpOp2 = cmp->gtGetOp2();
+
+ if (cmpOp1->IsSIMDEqualityOrInequality() && (cmpOp2->IsIntegralConst(0) || cmpOp2->IsIntegralConst(1)))
+ {
+ // We always generate code for a SIMD equality comparison, but the compare
+ // is contained (evaluated as part of the GT_JTRUE).
+ // Neither the SIMD node nor the immediate need to be evaluated into a register.
+ l->clearOperandCounts(cmp);
+ l->clearDstCount(cmpOp1);
+ l->clearOperandCounts(cmpOp2);
+
+ // Codegen of SIMD (in)Equality uses target integer reg only for setting flags.
+ // A target reg is not needed on AVX when comparing against Vector Zero.
+ // In all other cases we need to reserve an int type internal register, since we
+ // have cleared dstCount.
+ if (!compiler->canUseAVX() || !cmpOp1->gtGetOp2()->IsIntegralConstVector(0))
+ {
+ ++(cmpOp1->gtLsraInfo.internalIntCount);
+ regMaskTP internalCandidates = cmpOp1->gtLsraInfo.getInternalCandidates(l);
+ internalCandidates |= l->allRegs(TYP_INT);
+ cmpOp1->gtLsraInfo.setInternalCandidates(l, internalCandidates);
+ }
+
+ // We have to reverse compare oper in the following cases:
+ // 1) SIMD Equality: Sets Zero flag on equal otherwise clears it.
+ // Therefore, if compare oper is == or != against false(0), we will
+ // be checking opposite of what is required.
+ //
+ // 2) SIMD inEquality: Clears Zero flag on true otherwise sets it.
+ // Therefore, if compare oper is == or != against true(1), we will
+ // be checking opposite of what is required.
+ GenTreeSIMD* simdNode = cmpOp1->AsSIMD();
+ if (simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpEquality)
+ {
+ if (cmpOp2->IsIntegralConst(0))
+ {
+ cmp->SetOper(GenTree::ReverseRelop(cmpOper));
+ }
+ }
+ else
+ {
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicOpInEquality);
+ if (cmpOp2->IsIntegralConst(1))
+ {
+ cmp->SetOper(GenTree::ReverseRelop(cmpOper));
+ }
+ }
+ }
+ }
+#endif // FEATURE_SIMD
+ }
+ break;
+
+ case GT_JCC:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_JMP:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_SWITCH:
+ // This should never occur since switch nodes must not be visible at this
+ // point in the JIT.
+ info->srcCount = 0;
+ info->dstCount = 0; // To avoid getting uninit errors.
+ noway_assert(!"Switch must be lowered at this point");
+ break;
+
+ case GT_JMPTABLE:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ break;
+
+ case GT_SWITCH_TABLE:
+ info->srcCount = 2;
+ info->internalIntCount = 1;
+ info->dstCount = 0;
+ break;
+
+ case GT_ASG:
+ case GT_ASG_ADD:
+ case GT_ASG_SUB:
+ noway_assert(!"We should never hit any assignment operator in lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+#if !defined(_TARGET_64BIT_)
+ case GT_ADD_LO:
+ case GT_ADD_HI:
+ case GT_SUB_LO:
+ case GT_SUB_HI:
+#endif
+ case GT_ADD:
+ case GT_SUB:
+ // SSE2 arithmetic instructions doesn't support the form "op mem, xmm".
+ // Rather they only support "op xmm, mem/xmm" form.
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ // overflow operations aren't supported on float/double types.
+ assert(!tree->gtOverflow());
+
+ op1 = tree->gtGetOp1();
+ op2 = tree->gtGetOp2();
+
+ // No implicit conversions at this stage as the expectation is that
+ // everything is made explicit by adding casts.
+ assert(op1->TypeGet() == op2->TypeGet());
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
+ {
+ MakeSrcContained(tree, op2);
+ }
+ else if (tree->OperIsCommutative() &&
+ (op1->IsCnsNonZeroFltOrDbl() || (op1->isMemoryOp() && IsSafeToContainMem(tree, op1))))
+ {
+ // Though we have GT_ADD(op1=memOp, op2=non-memOp, we try to reorder the operands
+ // as long as it is safe so that the following efficient code sequence is generated:
+ // addss/sd targetReg, memOp (if op1Reg == targetReg) OR
+ // movaps targetReg, op2Reg; addss/sd targetReg, [memOp]
+ //
+ // Instead of
+ // movss op1Reg, [memOp]; addss/sd targetReg, Op2Reg (if op1Reg == targetReg) OR
+ // movss op1Reg, [memOp]; movaps targetReg, op1Reg, addss/sd targetReg, Op2Reg
+ MakeSrcContained(tree, op1);
+ }
+ else
+ {
+ // If there are no containable operands, we can make an operand reg optional.
+ SetRegOptionalForBinOp(tree);
+ }
+ break;
+ }
+
+ __fallthrough;
+
+ case GT_AND:
+ case GT_OR:
+ case GT_XOR:
+ TreeNodeInfoInitLogicalOp(tree);
+ break;
+
+ case GT_RETURNTRAP:
+ // This just turns into a compare of its child with an int + a conditional call
+ info->srcCount = 1;
+ info->dstCount = 0;
+ if (tree->gtOp.gtOp1->isIndir())
+ {
+ MakeSrcContained(tree, tree->gtOp.gtOp1);
+ }
+ info->internalIntCount = 1;
+ info->setInternalCandidates(l, l->allRegs(TYP_INT));
+ break;
+
+ case GT_MOD:
+ case GT_DIV:
+ case GT_UMOD:
+ case GT_UDIV:
+ TreeNodeInfoInitModDiv(tree);
+ break;
+
+ case GT_MUL:
+ case GT_MULHI:
+#if defined(_TARGET_X86_) && !defined(LEGACY_BACKEND)
+ case GT_MUL_LONG:
+#endif
+ TreeNodeInfoInitMul(tree);
+ break;
+
+ case GT_INTRINSIC:
+ TreeNodeInfoInitIntrinsic(tree);
+ break;
+
+#ifdef FEATURE_SIMD
+ case GT_SIMD:
+ TreeNodeInfoInitSIMD(tree);
+ break;
+#endif // FEATURE_SIMD
+
+ case GT_CAST:
+ TreeNodeInfoInitCast(tree);
+ break;
+
+ case GT_NEG:
+ info->srcCount = 1;
+ info->dstCount = 1;
+
+ // TODO-XArch-CQ:
+ // SSE instruction set doesn't have an instruction to negate a number.
+ // The recommended way is to xor the float/double number with a bitmask.
+ // The only way to xor is using xorps or xorpd both of which operate on
+ // 128-bit operands. To hold the bit-mask we would need another xmm
+ // register or a 16-byte aligned 128-bit data constant. Right now emitter
+ // lacks the support for emitting such constants or instruction with mem
+ // addressing mode referring to a 128-bit operand. For now we use an
+ // internal xmm register to load 32/64-bit bitmask from data section.
+ // Note that by trading additional data section memory (128-bit) we can
+ // save on the need for an internal register and also a memory-to-reg
+ // move.
+ //
+ // Note: another option to avoid internal register requirement is by
+ // lowering as GT_SUB(0, src). This will generate code different from
+ // Jit64 and could possibly result in compat issues (?).
+ if (varTypeIsFloating(tree))
+ {
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(l, l->internalFloatRegCandidates());
+ }
+ else
+ {
+ // Codegen of this tree node sets ZF and SF flags.
+ tree->gtFlags |= GTF_ZSF_SET;
+ }
+ break;
+
+ case GT_NOT:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ break;
+
+ case GT_LSH:
+ case GT_RSH:
+ case GT_RSZ:
+ case GT_ROL:
+ case GT_ROR:
+#ifdef _TARGET_X86_
+ case GT_LSH_HI:
+ case GT_RSH_LO:
+#endif
+ TreeNodeInfoInitShiftRotate(tree);
+ break;
+
+ case GT_EQ:
+ case GT_NE:
+ case GT_LT:
+ case GT_LE:
+ case GT_GE:
+ case GT_GT:
+ case GT_TEST_EQ:
+ case GT_TEST_NE:
+ TreeNodeInfoInitCmp(tree);
+ break;
+
+ case GT_CKFINITE:
+ info->srcCount = 1;
+ info->dstCount = 1;
+ info->internalIntCount = 1;
+ break;
+
+ case GT_CMPXCHG:
+ info->srcCount = 3;
+ info->dstCount = 1;
+
+ // comparand is preferenced to RAX.
+ // Remaining two operands can be in any reg other than RAX.
+ tree->gtCmpXchg.gtOpComparand->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
+ tree->gtCmpXchg.gtOpLocation->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RAX);
+ tree->gtCmpXchg.gtOpValue->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RAX);
+ tree->gtLsraInfo.setDstCandidates(l, RBM_RAX);
+ break;
+
+ case GT_LOCKADD:
+ info->srcCount = 2;
+ info->dstCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+
+ CheckImmedAndMakeContained(tree, tree->gtOp.gtOp2);
+ break;
+
+ case GT_CALL:
+ TreeNodeInfoInitCall(tree->AsCall());
+ break;
+
+ case GT_ADDR:
+ {
+ // For a GT_ADDR, the child node should not be evaluated into a register
+ GenTreePtr child = tree->gtOp.gtOp1;
+ assert(!l->isCandidateLocalRef(child));
+ l->clearDstCount(child);
+ info->srcCount = 0;
+ info->dstCount = 1;
+ }
+ break;
+
+#if !defined(FEATURE_PUT_STRUCT_ARG_STK)
+ case GT_OBJ:
+#endif
+ case GT_BLK:
+ case GT_DYN_BLK:
+ // These should all be eliminated prior to Lowering.
+ assert(!"Non-store block node in Lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+ case GT_PUTARG_STK:
+ LowerPutArgStk(tree->AsPutArgStk());
+ TreeNodeInfoInitPutArgStk(tree->AsPutArgStk());
+ break;
+#endif // FEATURE_PUT_STRUCT_ARG_STK
+
+ case GT_STORE_BLK:
+ case GT_STORE_OBJ:
+ case GT_STORE_DYN_BLK:
+ LowerBlockStore(tree->AsBlk());
+ TreeNodeInfoInitBlockStore(tree->AsBlk());
+ break;
+
+ case GT_INIT_VAL:
+ // Always a passthrough of its child's value.
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_LCLHEAP:
+ TreeNodeInfoInitLclHeap(tree);
+ break;
+
+ case GT_ARR_BOUNDS_CHECK:
+#ifdef FEATURE_SIMD
+ case GT_SIMD_CHK:
+#endif // FEATURE_SIMD
+ {
+ GenTreeBoundsChk* node = tree->AsBoundsChk();
+ // Consumes arrLen & index - has no result
+ info->srcCount = 2;
+ info->dstCount = 0;
+
+ GenTreePtr other;
+ if (CheckImmedAndMakeContained(tree, node->gtIndex))
+ {
+ other = node->gtArrLen;
+ }
+ else if (CheckImmedAndMakeContained(tree, node->gtArrLen))
+ {
+ other = node->gtIndex;
+ }
+ else if (node->gtIndex->isMemoryOp())
+ {
+ other = node->gtIndex;
+ }
+ else
+ {
+ other = node->gtArrLen;
+ }
+
+ if (node->gtIndex->TypeGet() == node->gtArrLen->TypeGet())
+ {
+ if (other->isMemoryOp())
+ {
+ MakeSrcContained(tree, other);
+ }
+ else
+ {
+ // We can mark 'other' as reg optional, since it is not contained.
+ SetRegOptional(other);
+ }
+ }
+ }
+ break;
+
+ case GT_ARR_ELEM:
+ // These must have been lowered to GT_ARR_INDEX
+ noway_assert(!"We should never see a GT_ARR_ELEM in lowering");
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+
+ case GT_ARR_INDEX:
+ info->srcCount = 2;
+ info->dstCount = 1;
+ // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple
+ // times while the result is being computed.
+ tree->AsArrIndex()->ArrObj()->gtLsraInfo.isDelayFree = true;
+ info->hasDelayFreeSrc = true;
+ break;
+
+ case GT_ARR_OFFSET:
+ // This consumes the offset, if any, the arrObj and the effective index,
+ // and produces the flattened offset for this dimension.
+ info->srcCount = 3;
+ info->dstCount = 1;
+
+ // we don't want to generate code for this
+ if (tree->gtArrOffs.gtOffset->IsIntegralConst(0))
+ {
+ MakeSrcContained(tree, tree->gtArrOffs.gtOffset);
+ }
+ else
+ {
+ // Here we simply need an internal register, which must be different
+ // from any of the operand's registers, but may be the same as targetReg.
+ info->internalIntCount = 1;
+ }
+ break;
+
+ case GT_LEA:
+ // The LEA usually passes its operands through to the GT_IND, in which case we'll
+ // clear the info->srcCount and info->dstCount later, but we may be instantiating an address,
+ // so we set them here.
+ info->srcCount = 0;
+ if (tree->AsAddrMode()->HasBase())
+ {
+ info->srcCount++;
+ }
+ if (tree->AsAddrMode()->HasIndex())
+ {
+ info->srcCount++;
+ }
+ info->dstCount = 1;
+ break;
+
+ case GT_STOREIND:
+ {
+ info->srcCount = 2;
+ info->dstCount = 0;
+ GenTree* src = tree->gtOp.gtOp2;
+
+ if (compiler->codeGen->gcInfo.gcIsWriteBarrierAsgNode(tree))
+ {
+ TreeNodeInfoInitGCWriteBarrier(tree);
+ break;
+ }
+
+ // If the source is a containable immediate, make it contained, unless it is
+ // an int-size or larger store of zero to memory, because we can generate smaller code
+ // by zeroing a register and then storing it.
+ if (IsContainableImmed(tree, src) &&
+ (!src->IsIntegralConst(0) || varTypeIsSmall(tree) || tree->gtGetOp1()->OperGet() == GT_CLS_VAR_ADDR))
+ {
+ MakeSrcContained(tree, src);
+ }
+ else if (!varTypeIsFloating(tree))
+ {
+ // Perform recognition of trees with the following structure:
+ // StoreInd(addr, BinOp(expr, GT_IND(addr)))
+ // to be able to fold this into an instruction of the form
+ // BINOP [addr], register
+ // where register is the actual place where 'expr' is computed.
+ //
+ // SSE2 doesn't support RMW form of instructions.
+ if (TreeNodeInfoInitIfRMWMemOp(tree))
+ {
+ break;
+ }
+ }
+
+ TreeNodeInfoInitIndir(tree);
+ }
+ break;
+
+ case GT_NULLCHECK:
+ info->dstCount = 0;
+ info->srcCount = 1;
+ info->isLocalDefUse = true;
+ break;
+
+ case GT_IND:
+ info->dstCount = 1;
+ info->srcCount = 1;
+ TreeNodeInfoInitIndir(tree);
+ break;
+
+ case GT_CATCH_ARG:
+ info->srcCount = 0;
+ info->dstCount = 1;
+ info->setDstCandidates(l, RBM_EXCEPTION_OBJECT);
+ break;
+
+#if !FEATURE_EH_FUNCLETS
+ case GT_END_LFIN:
+ info->srcCount = 0;
+ info->dstCount = 0;
+ break;
+#endif
+
+ case GT_CLS_VAR:
+ // These nodes are eliminated by rationalizer.
+ JITDUMP("Unexpected node %s in Lower.\n", GenTree::NodeName(tree->OperGet()));
+ unreached();
+ break;
+ } // end switch (tree->OperGet())
+
+ // If op2 of a binary-op gets marked as contained, then binary-op srcCount will be 1.
+ // Even then we would like to set isTgtPref on Op1.
+ if (tree->OperIsBinary() && info->srcCount >= 1)
+ {
+ if (isRMWRegOper(tree))
+ {
+ GenTree* op1 = tree->gtOp.gtOp1;
+ GenTree* op2 = tree->gtOp.gtOp2;
+
+ // Commutative opers like add/mul/and/or/xor could reverse the order of
+ // operands if it is safe to do so. In such a case we would like op2 to be
+ // target preferenced instead of op1.
+ if (tree->OperIsCommutative() && op1->gtLsraInfo.dstCount == 0 && op2 != nullptr)
+ {
+ op1 = op2;
+ op2 = tree->gtOp.gtOp1;
+ }
+
+ // If we have a read-modify-write operation, we want to preference op1 to the target.
+ // If op1 is contained, we don't want to preference it, but it won't
+ // show up as a source in that case, so it will be ignored.
+ op1->gtLsraInfo.isTgtPref = true;
+
+ // Is this a non-commutative operator, or is op2 a contained memory op?
+ // (Note that we can't call IsContained() at this point because it uses exactly the
+ // same information we're currently computing.)
+ // In either case, we need to make op2 remain live until the op is complete, by marking
+ // the source(s) associated with op2 as "delayFree".
+ // Note that if op2 of a binary RMW operator is a memory op, even if the operator
+ // is commutative, codegen cannot reverse them.
+ // TODO-XArch-CQ: This is not actually the case for all RMW binary operators, but there's
+ // more work to be done to correctly reverse the operands if they involve memory
+ // operands. Also, we may need to handle more cases than GT_IND, especially once
+ // we've modified the register allocator to not require all nodes to be assigned
+ // a register (e.g. a spilled lclVar can often be referenced directly from memory).
+ // Note that we may have a null op2, even with 2 sources, if op1 is a base/index memory op.
+
+ GenTree* delayUseSrc = nullptr;
+ // TODO-XArch-Cleanup: We should make the indirection explicit on these nodes so that we don't have
+ // to special case them.
+ if (tree->OperGet() == GT_XADD || tree->OperGet() == GT_XCHG || tree->OperGet() == GT_LOCKADD)
+ {
+ // These tree nodes will have their op1 marked as isDelayFree=true.
+ // Hence these tree nodes should have a Def position so that op1's reg
+ // gets freed at DefLoc+1.
+ if (tree->TypeGet() == TYP_VOID)
+ {
+ // Right now a GT_XADD node could be morphed into a
+ // GT_LOCKADD of TYP_VOID. See gtExtractSideEffList().
+ // Note that it is advantageous to use GT_LOCKADD
+ // instead of of GT_XADD as the former uses lock.add,
+ // which allows its second operand to be a contained
+ // immediate wheres xadd instruction requires its
+ // second operand to be in a register.
+ assert(tree->gtLsraInfo.dstCount == 0);
+
+ // Give it an artificial type and mark it isLocalDefUse = true.
+ // This would result in a Def position created but not considered
+ // consumed by its parent node.
+ tree->gtType = TYP_INT;
+ tree->gtLsraInfo.isLocalDefUse = true;
+ }
+ else
+ {
+ assert(tree->gtLsraInfo.dstCount != 0);
+ }
+
+ delayUseSrc = op1;
+ }
+ else if ((op2 != nullptr) &&
+ (!tree->OperIsCommutative() || (op2->isMemoryOp() && (op2->gtLsraInfo.srcCount == 0))))
+ {
+ delayUseSrc = op2;
+ }
+ if (delayUseSrc != nullptr)
+ {
+ // If delayUseSrc is an indirection and it doesn't produce a result, then we need to set "delayFree'
+ // on the base & index, if any.
+ // Otherwise, we set it on delayUseSrc itself.
+ if (delayUseSrc->isIndir() && (delayUseSrc->gtLsraInfo.dstCount == 0))
+ {
+ GenTree* base = delayUseSrc->AsIndir()->Base();
+ GenTree* index = delayUseSrc->AsIndir()->Index();
+ if (base != nullptr)
+ {
+ base->gtLsraInfo.isDelayFree = true;
+ }
+ if (index != nullptr)
+ {
+ index->gtLsraInfo.isDelayFree = true;
+ }
+ }
+ else
+ {
+ delayUseSrc->gtLsraInfo.isDelayFree = true;
+ }
+ info->hasDelayFreeSrc = true;
+ }
+ }
+ }
+
+ TreeNodeInfoInitCheckByteable(tree);
+
+ // We need to be sure that we've set info->srcCount and info->dstCount appropriately
+ assert((info->dstCount < 2) || (tree->IsMultiRegCall() && info->dstCount == MAX_RET_REG_COUNT));
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCheckByteable: Check the tree to see if "byte-able" registers are
+// required, and set the tree node info accordingly.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitCheckByteable(GenTree* tree)
+{
+#ifdef _TARGET_X86_
+ LinearScan* l = m_lsra;
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+ // Exclude RBM_NON_BYTE_REGS from dst candidates of tree node and src candidates of operands
+ // if the tree node is a byte type.
+ //
+ // Though this looks conservative in theory, in practice we could not think of a case where
+ // the below logic leads to conservative register specification. In future when or if we find
+ // one such case, this logic needs to be fine tuned for that case(s).
+
+ if (ExcludeNonByteableRegisters(tree))
+ {
+ regMaskTP regMask;
+ if (info->dstCount > 0)
+ {
+ regMask = info->getDstCandidates(l);
+ assert(regMask != RBM_NONE);
+ info->setDstCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
+ }
+
+ if (tree->OperIsSimple() && (info->srcCount > 0))
+ {
+ // No need to set src candidates on a contained child operand.
+ GenTree* op = tree->gtOp.gtOp1;
+ assert(op != nullptr);
+ bool containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
+ if (!containedNode)
+ {
+ regMask = op->gtLsraInfo.getSrcCandidates(l);
+ assert(regMask != RBM_NONE);
+ op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
+ }
+
+ if (tree->OperIsBinary() && (tree->gtOp.gtOp2 != nullptr))
+ {
+ op = tree->gtOp.gtOp2;
+ containedNode = (op->gtLsraInfo.srcCount == 0) && (op->gtLsraInfo.dstCount == 0);
+ if (!containedNode)
+ {
+ regMask = op->gtLsraInfo.getSrcCandidates(l);
+ assert(regMask != RBM_NONE);
+ op->gtLsraInfo.setSrcCandidates(l, regMask & ~RBM_NON_BYTE_REGS);
+ }
+ }
+ }
+ }
+#endif //_TARGET_X86_
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitSimple: Sets the srcCount and dstCount for all the trees
+// without special handling based on the tree node type.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitSimple(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ unsigned kind = tree->OperKind();
+ info->dstCount = tree->IsValue() ? 1 : 0;
+ if (kind & (GTK_CONST | GTK_LEAF))
+ {
+ info->srcCount = 0;
+ }
+ else if (kind & (GTK_SMPOP))
+ {
+ if (tree->gtGetOp2IfPresent() != nullptr)
+ {
+ info->srcCount = 2;
+ }
+ else
+ {
+ info->srcCount = 1;
+ }
+ }
+ else
+ {
+ unreached();
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitReturn: Set the NodeInfo for a GT_RETURN.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitReturn(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+#if !defined(_TARGET_64BIT_)
+ if (tree->TypeGet() == TYP_LONG)
+ {
+ GenTree* op1 = tree->gtGetOp1();
+ noway_assert(op1->OperGet() == GT_LONG);
+ GenTree* loVal = op1->gtGetOp1();
+ GenTree* hiVal = op1->gtGetOp2();
+ info->srcCount = 2;
+ loVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_LO);
+ hiVal->gtLsraInfo.setSrcCandidates(l, RBM_LNGRET_HI);
+ info->dstCount = 0;
+ }
+ else
+#endif // !defined(_TARGET_64BIT_)
+ {
+ GenTree* op1 = tree->gtGetOp1();
+ regMaskTP useCandidates = RBM_NONE;
+
+ info->srcCount = (tree->TypeGet() == TYP_VOID) ? 0 : 1;
+ info->dstCount = 0;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (varTypeIsStruct(tree))
+ {
+ // op1 has to be either an lclvar or a multi-reg returning call
+ if (op1->OperGet() == GT_LCL_VAR)
+ {
+ GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
+ LclVarDsc* varDsc = &(compiler->lvaTable[lclVarCommon->gtLclNum]);
+ assert(varDsc->lvIsMultiRegRet);
+
+ // Mark var as contained if not enregistrable.
+ if (!varTypeIsEnregisterableStruct(op1))
+ {
+ MakeSrcContained(tree, op1);
+ }
+ }
+ else
+ {
+ noway_assert(op1->IsMultiRegCall());
+
+ ReturnTypeDesc* retTypeDesc = op1->AsCall()->GetReturnTypeDesc();
+ info->srcCount = retTypeDesc->GetReturnRegCount();
+ useCandidates = retTypeDesc->GetABIReturnRegs();
+ }
+ }
+ else
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ {
+ // Non-struct type return - determine useCandidates
+ switch (tree->TypeGet())
+ {
+ case TYP_VOID:
+ useCandidates = RBM_NONE;
+ break;
+ case TYP_FLOAT:
+ useCandidates = RBM_FLOATRET;
+ break;
+ case TYP_DOUBLE:
+ useCandidates = RBM_DOUBLERET;
+ break;
+#if defined(_TARGET_64BIT_)
+ case TYP_LONG:
+ useCandidates = RBM_LNGRET;
+ break;
+#endif // defined(_TARGET_64BIT_)
+ default:
+ useCandidates = RBM_INTRET;
+ break;
+ }
+ }
+
+ if (useCandidates != RBM_NONE)
+ {
+ op1->gtLsraInfo.setSrcCandidates(l, useCandidates);
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitShiftRotate: Set the NodeInfo for a shift or rotate.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitShiftRotate(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ // For shift operations, we need that the number
+ // of bits moved gets stored in CL in case
+ // the number of bits to shift is not a constant.
+ GenTreePtr shiftBy = tree->gtOp.gtOp2;
+ GenTreePtr source = tree->gtOp.gtOp1;
+
+#ifdef _TARGET_X86_
+ // The first operand of a GT_LSH_HI and GT_RSH_LO oper is a GT_LONG so that
+ // we can have a three operand form. Increment the srcCount.
+ if (tree->OperGet() == GT_LSH_HI || tree->OperGet() == GT_RSH_LO)
+ {
+ assert(source->OperGet() == GT_LONG);
+
+ info->srcCount++;
+
+ if (tree->OperGet() == GT_LSH_HI)
+ {
+ GenTreePtr sourceLo = source->gtOp.gtOp1;
+ sourceLo->gtLsraInfo.isDelayFree = true;
+ }
+ else
+ {
+ GenTreePtr sourceHi = source->gtOp.gtOp2;
+ sourceHi->gtLsraInfo.isDelayFree = true;
+ }
+
+ source->gtLsraInfo.hasDelayFreeSrc = true;
+ info->hasDelayFreeSrc = true;
+ }
+#endif
+
+ // x64 can encode 8 bits of shift and it will use 5 or 6. (the others are masked off)
+ // We will allow whatever can be encoded - hope you know what you are doing.
+ if (!IsContainableImmed(tree, shiftBy) || (shiftBy->gtIntConCommon.IconValue() > 255) ||
+ (shiftBy->gtIntConCommon.IconValue() < 0))
+ {
+ source->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~RBM_RCX);
+ shiftBy->gtLsraInfo.setSrcCandidates(l, RBM_RCX);
+ info->setDstCandidates(l, l->allRegs(TYP_INT) & ~RBM_RCX);
+ }
+ else
+ {
+ MakeSrcContained(tree, shiftBy);
+
+ // Note that Rotate Left/Right instructions don't set ZF and SF flags.
+ //
+ // If the operand being shifted is 32-bits then upper three bits are masked
+ // by hardware to get actual shift count. Similarly for 64-bit operands
+ // shift count is narrowed to [0..63]. If the resulting shift count is zero,
+ // then shift operation won't modify flags.
+ //
+ // TODO-CQ-XARCH: We can optimize generating 'test' instruction for GT_EQ/NE(shift, 0)
+ // if the shift count is known to be non-zero and in the range depending on the
+ // operand size.
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCall: Set the NodeInfo for a call.
+//
+// Arguments:
+// call - The call node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitCall(GenTreeCall* call)
+{
+ TreeNodeInfo* info = &(call->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+ bool hasMultiRegRetVal = false;
+ ReturnTypeDesc* retTypeDesc = nullptr;
+
+ info->srcCount = 0;
+ if (call->TypeGet() != TYP_VOID)
+ {
+ hasMultiRegRetVal = call->HasMultiRegRetVal();
+ if (hasMultiRegRetVal)
+ {
+ // dst count = number of registers in which the value is returned by call
+ retTypeDesc = call->GetReturnTypeDesc();
+ info->dstCount = retTypeDesc->GetReturnRegCount();
+ }
+ else
+ {
+ info->dstCount = 1;
+ }
+ }
+ else
+ {
+ info->dstCount = 0;
+ }
+
+ GenTree* ctrlExpr = call->gtControlExpr;
+ if (call->gtCallType == CT_INDIRECT)
+ {
+ // either gtControlExpr != null or gtCallAddr != null.
+ // Both cannot be non-null at the same time.
+ assert(ctrlExpr == nullptr);
+ assert(call->gtCallAddr != nullptr);
+ ctrlExpr = call->gtCallAddr;
+
+#ifdef _TARGET_X86_
+ // Fast tail calls aren't currently supported on x86, but if they ever are, the code
+ // below that handles indirect VSD calls will need to be fixed.
+ assert(!call->IsFastTailCall() || !call->IsVirtualStub());
+#endif // _TARGET_X86_
+ }
+
+ // set reg requirements on call target represented as control sequence.
+ if (ctrlExpr != nullptr)
+ {
+ // we should never see a gtControlExpr whose type is void.
+ assert(ctrlExpr->TypeGet() != TYP_VOID);
+
+ // call can take a Rm op on x64
+ info->srcCount++;
+
+ // In case of fast tail implemented as jmp, make sure that gtControlExpr is
+ // computed into a register.
+ if (!call->IsFastTailCall())
+ {
+#ifdef _TARGET_X86_
+ // On x86, we need to generate a very specific pattern for indirect VSD calls:
+ //
+ // 3-byte nop
+ // call dword ptr [eax]
+ //
+ // Where EAX is also used as an argument to the stub dispatch helper. Make
+ // sure that the call target address is computed into EAX in this case.
+ if (call->IsVirtualStub() && (call->gtCallType == CT_INDIRECT))
+ {
+ assert(ctrlExpr->isIndir());
+
+ ctrlExpr->gtGetOp1()->gtLsraInfo.setSrcCandidates(l, RBM_VIRTUAL_STUB_TARGET);
+ MakeSrcContained(call, ctrlExpr);
+ }
+ else
+#endif // _TARGET_X86_
+ if (ctrlExpr->isIndir())
+ {
+ MakeSrcContained(call, ctrlExpr);
+ }
+ }
+ else
+ {
+ // Fast tail call - make sure that call target is always computed in RAX
+ // so that epilog sequence can generate "jmp rax" to achieve fast tail call.
+ ctrlExpr->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
+ }
+ }
+
+ // If this is a varargs call, we will clear the internal candidates in case we need
+ // to reserve some integer registers for copying float args.
+ // We have to do this because otherwise the default candidates are allRegs, and adding
+ // the individual specific registers will have no effect.
+ if (call->IsVarargs())
+ {
+ info->setInternalCandidates(l, RBM_NONE);
+ }
+
+ RegisterType registerType = call->TypeGet();
+
+ // Set destination candidates for return value of the call.
+ CLANG_FORMAT_COMMENT_ANCHOR;
+
+#ifdef _TARGET_X86_
+ if (call->IsHelperCall(compiler, CORINFO_HELP_INIT_PINVOKE_FRAME))
+ {
+ // The x86 CORINFO_HELP_INIT_PINVOKE_FRAME helper uses a custom calling convention that returns with
+ // TCB in REG_PINVOKE_TCB. AMD64/ARM64 use the standard calling convention. fgMorphCall() sets the
+ // correct argument registers.
+ info->setDstCandidates(l, RBM_PINVOKE_TCB);
+ }
+ else
+#endif // _TARGET_X86_
+ if (hasMultiRegRetVal)
+ {
+ assert(retTypeDesc != nullptr);
+ info->setDstCandidates(l, retTypeDesc->GetABIReturnRegs());
+ }
+ else if (varTypeIsFloating(registerType))
+ {
+#ifdef _TARGET_X86_
+ // The return value will be on the X87 stack, and we will need to move it.
+ info->setDstCandidates(l, l->allRegs(registerType));
+#else // !_TARGET_X86_
+ info->setDstCandidates(l, RBM_FLOATRET);
+#endif // !_TARGET_X86_
+ }
+ else if (registerType == TYP_LONG)
+ {
+ info->setDstCandidates(l, RBM_LNGRET);
+ }
+ else
+ {
+ info->setDstCandidates(l, RBM_INTRET);
+ }
+
+ // number of args to a call =
+ // callRegArgs + (callargs - placeholders, setup, etc)
+ // there is an explicit thisPtr but it is redundant
+
+ // If there is an explicit this pointer, we don't want that node to produce anything
+ // as it is redundant
+ if (call->gtCallObjp != nullptr)
+ {
+ GenTreePtr thisPtrNode = call->gtCallObjp;
+
+ if (thisPtrNode->gtOper == GT_PUTARG_REG)
+ {
+ l->clearOperandCounts(thisPtrNode);
+ l->clearDstCount(thisPtrNode->gtOp.gtOp1);
+ }
+ else
+ {
+ l->clearDstCount(thisPtrNode);
+ }
+ }
+
+#if FEATURE_VARARG
+ bool callHasFloatRegArgs = false;
+#endif // !FEATURE_VARARG
+
+ // First, count reg args
+ for (GenTreePtr list = call->gtCallLateArgs; list; list = list->MoveNext())
+ {
+ assert(list->OperIsList());
+
+ GenTreePtr argNode = list->Current();
+
+ fgArgTabEntryPtr curArgTabEntry = compiler->gtArgEntryByNode(call, argNode);
+ assert(curArgTabEntry);
+
+ if (curArgTabEntry->regNum == REG_STK)
+ {
+ // late arg that is not passed in a register
+ DISPNODE(argNode);
+ assert(argNode->gtOper == GT_PUTARG_STK);
+ argNode->gtLsraInfo.srcCount = 1;
+ argNode->gtLsraInfo.dstCount = 0;
+
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+ // If the node is TYP_STRUCT and it is put on stack with
+ // putarg_stk operation, we consume and produce no registers.
+ // In this case the embedded Obj node should not produce
+ // registers too since it is contained.
+ // Note that if it is a SIMD type the argument will be in a register.
+ if (argNode->TypeGet() == TYP_STRUCT)
+ {
+ assert(argNode->gtOp.gtOp1 != nullptr && argNode->gtOp.gtOp1->OperGet() == GT_OBJ);
+ argNode->gtOp.gtOp1->gtLsraInfo.dstCount = 0;
+ argNode->gtLsraInfo.srcCount = 0;
+ }
+#endif // FEATURE_PUT_STRUCT_ARG_STK
+ continue;
+ }
+
+ regNumber argReg = REG_NA;
+ regMaskTP argMask = RBM_NONE;
+ short regCount = 0;
+ bool isOnStack = true;
+ if (curArgTabEntry->regNum != REG_STK)
+ {
+ isOnStack = false;
+ var_types argType = argNode->TypeGet();
+
+#if FEATURE_VARARG
+ callHasFloatRegArgs |= varTypeIsFloating(argType);
+#endif // !FEATURE_VARARG
+
+ argReg = curArgTabEntry->regNum;
+ regCount = 1;
+
+ // Default case is that we consume one source; modify this later (e.g. for
+ // promoted structs)
+ info->srcCount++;
+
+ argMask = genRegMask(argReg);
+ argNode = argNode->gtEffectiveVal();
+ }
+
+ // If the struct arg is wrapped in CPYBLK the type of the param will be TYP_VOID.
+ // Use the curArgTabEntry's isStruct to get whether the param is a struct.
+ if (varTypeIsStruct(argNode) PUT_STRUCT_ARG_STK_ONLY(|| curArgTabEntry->isStruct))
+ {
+ unsigned originalSize = 0;
+ LclVarDsc* varDsc = nullptr;
+ if (argNode->gtOper == GT_LCL_VAR)
+ {
+ varDsc = compiler->lvaTable + argNode->gtLclVarCommon.gtLclNum;
+ originalSize = varDsc->lvSize();
+ }
+ else if (argNode->gtOper == GT_MKREFANY)
+ {
+ originalSize = 2 * TARGET_POINTER_SIZE;
+ }
+ else if (argNode->gtOper == GT_OBJ)
+ {
+ noway_assert(!"GT_OBJ not supported for amd64");
+ }
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ else if (argNode->gtOper == GT_PUTARG_REG)
+ {
+ originalSize = genTypeSize(argNode->gtType);
+ }
+ else if (argNode->gtOper == GT_FIELD_LIST)
+ {
+ originalSize = 0;
+
+ // There could be up to 2 PUTARG_REGs in the list
+ GenTreeFieldList* fieldListPtr = argNode->AsFieldList();
+ unsigned iterationNum = 0;
+ for (; fieldListPtr; fieldListPtr = fieldListPtr->Rest())
+ {
+ GenTreePtr putArgRegNode = fieldListPtr->Current();
+ assert(putArgRegNode->gtOper == GT_PUTARG_REG);
+
+ if (iterationNum == 0)
+ {
+ varDsc = compiler->lvaTable + putArgRegNode->gtOp.gtOp1->gtLclVarCommon.gtLclNum;
+ originalSize = varDsc->lvSize();
+ assert(originalSize != 0);
+ }
+ else
+ {
+ // Need an extra source for every node, but the first in the list.
+ info->srcCount++;
+
+ // Get the mask for the second putarg_reg
+ argMask = genRegMask(curArgTabEntry->otherRegNum);
+ }
+
+ putArgRegNode->gtLsraInfo.setDstCandidates(l, argMask);
+ putArgRegNode->gtLsraInfo.setSrcCandidates(l, argMask);
+
+ // To avoid redundant moves, have the argument child tree computed in the
+ // register in which the argument is passed to the call.
+ putArgRegNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(putArgRegNode));
+ iterationNum++;
+ }
+
+ assert(iterationNum <= CLR_SYSTEMV_MAX_EIGHTBYTES_COUNT_TO_PASS_IN_REGISTERS);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ else
+ {
+ noway_assert(!"Can't predict unsupported TYP_STRUCT arg kind");
+ }
+
+ unsigned slots = ((unsigned)(roundUp(originalSize, TARGET_POINTER_SIZE))) / REGSIZE_BYTES;
+ unsigned remainingSlots = slots;
+
+ if (!isOnStack)
+ {
+ remainingSlots = slots - 1;
+
+ regNumber reg = (regNumber)(argReg + 1);
+ while (remainingSlots > 0 && reg <= REG_ARG_LAST)
+ {
+ argMask |= genRegMask(reg);
+ reg = (regNumber)(reg + 1);
+ remainingSlots--;
+ regCount++;
+ }
+ }
+
+ short internalIntCount = 0;
+ if (remainingSlots > 0)
+ {
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // This TYP_STRUCT argument is also passed in the outgoing argument area
+ // We need a register to address the TYP_STRUCT
+ internalIntCount = 1;
+#else // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ // And we may need 2
+ internalIntCount = 2;
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ }
+ argNode->gtLsraInfo.internalIntCount = internalIntCount;
+
+#ifdef FEATURE_UNIX_AMD64_STRUCT_PASSING
+ if (argNode->gtOper == GT_PUTARG_REG)
+ {
+ argNode->gtLsraInfo.setDstCandidates(l, argMask);
+ argNode->gtLsraInfo.setSrcCandidates(l, argMask);
+ }
+#endif // FEATURE_UNIX_AMD64_STRUCT_PASSING
+ }
+ else
+ {
+ argNode->gtLsraInfo.setDstCandidates(l, argMask);
+ argNode->gtLsraInfo.setSrcCandidates(l, argMask);
+ }
+
+ // To avoid redundant moves, have the argument child tree computed in the
+ // register in which the argument is passed to the call.
+ if (argNode->gtOper == GT_PUTARG_REG)
+ {
+ argNode->gtOp.gtOp1->gtLsraInfo.setSrcCandidates(l, l->getUseCandidates(argNode));
+ }
+
+#if FEATURE_VARARG
+ // In the case of a varargs call, the ABI dictates that if we have floating point args,
+ // we must pass the enregistered arguments in both the integer and floating point registers.
+ // Since the integer register is not associated with this arg node, we will reserve it as
+ // an internal register so that it is not used during the evaluation of the call node
+ // (e.g. for the target).
+ if (call->IsVarargs() && varTypeIsFloating(argNode))
+ {
+ regNumber targetReg = compiler->getCallArgIntRegister(argReg);
+ info->setInternalIntCount(info->internalIntCount + 1);
+ info->addInternalCandidates(l, genRegMask(targetReg));
+ }
+#endif // FEATURE_VARARG
+ }
+
+ // Now, count stack args
+ // Note that these need to be computed into a register, but then
+ // they're just stored to the stack - so the reg doesn't
+ // need to remain live until the call. In fact, it must not
+ // because the code generator doesn't actually consider it live,
+ // so it can't be spilled.
+
+ GenTreePtr args = call->gtCallArgs;
+ while (args)
+ {
+ GenTreePtr arg = args->gtOp.gtOp1;
+ if (!(args->gtFlags & GTF_LATE_ARG))
+ {
+ TreeNodeInfo* argInfo = &(arg->gtLsraInfo);
+ if (argInfo->dstCount != 0)
+ {
+ argInfo->isLocalDefUse = true;
+ }
+
+ // If the child of GT_PUTARG_STK is a constant, we don't need a register to
+ // move it to memory (stack location).
+ //
+ // On AMD64, we don't want to make 0 contained, because we can generate smaller code
+ // by zeroing a register and then storing it. E.g.:
+ // xor rdx, rdx
+ // mov gword ptr [rsp+28H], rdx
+ // is 2 bytes smaller than:
+ // mov gword ptr [rsp+28H], 0
+ //
+ // On x86, we push stack arguments; we don't use 'mov'. So:
+ // push 0
+ // is 1 byte smaller than:
+ // xor rdx, rdx
+ // push rdx
+
+ argInfo->dstCount = 0;
+ if (arg->gtOper == GT_PUTARG_STK)
+ {
+ GenTree* op1 = arg->gtOp.gtOp1;
+ if (IsContainableImmed(arg, op1)
+#if defined(_TARGET_AMD64_)
+ && !op1->IsIntegralConst(0)
+#endif // _TARGET_AMD64_
+ )
+ {
+ MakeSrcContained(arg, op1);
+ }
+ }
+ }
+ args = args->gtOp.gtOp2;
+ }
+
+#if FEATURE_VARARG
+ // If it is a fast tail call, it is already preferenced to use RAX.
+ // Therefore, no need set src candidates on call tgt again.
+ if (call->IsVarargs() && callHasFloatRegArgs && !call->IsFastTailCall() && (ctrlExpr != nullptr))
+ {
+ // Don't assign the call target to any of the argument registers because
+ // we will use them to also pass floating point arguments as required
+ // by Amd64 ABI.
+ ctrlExpr->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_ARG_REGS));
+ }
+#endif // !FEATURE_VARARG
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitBlockStore: Set the NodeInfo for a block store.
+//
+// Arguments:
+// blkNode - The block store node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitBlockStore(GenTreeBlk* blkNode)
+{
+ GenTree* dstAddr = blkNode->Addr();
+ unsigned size = blkNode->gtBlkSize;
+ GenTree* source = blkNode->Data();
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ // Sources are dest address, initVal or source.
+ // We may require an additional source or temp register for the size.
+ blkNode->gtLsraInfo.srcCount = 2;
+ blkNode->gtLsraInfo.dstCount = 0;
+ blkNode->gtLsraInfo.setInternalCandidates(l, RBM_NONE);
+ GenTreePtr srcAddrOrFill = nullptr;
+ bool isInitBlk = blkNode->OperIsInitBlkOp();
+
+ regMaskTP dstAddrRegMask = RBM_NONE;
+ regMaskTP sourceRegMask = RBM_NONE;
+ regMaskTP blkSizeRegMask = RBM_NONE;
+
+ if (isInitBlk)
+ {
+ GenTree* initVal = source;
+ if (initVal->OperIsInitVal())
+ {
+ initVal = initVal->gtGetOp1();
+ }
+ srcAddrOrFill = initVal;
+
+ switch (blkNode->gtBlkOpKind)
+ {
+ case GenTreeBlk::BlkOpKindUnroll:
+ assert(initVal->IsCnsIntOrI());
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ // Reserve an XMM register to fill it with
+ // a pack of 16 init value constants.
+ ssize_t fill = initVal->gtIntCon.gtIconVal & 0xFF;
+ blkNode->gtLsraInfo.internalFloatCount = 1;
+ blkNode->gtLsraInfo.setInternalCandidates(l, l->internalFloatRegCandidates());
+ if ((fill == 0) && ((size & 0xf) == 0))
+ {
+ MakeSrcContained(blkNode, source);
+ }
+ // use XMM register to fill with constants, it's AVX instruction and set the flag
+ SetContainsAVXFlags();
+ }
+#ifdef _TARGET_X86_
+ if ((size & 1) != 0)
+ {
+ // On x86, you can't address the lower byte of ESI, EDI, ESP, or EBP when doing
+ // a "mov byte ptr [dest], val". If the fill size is odd, we will try to do this
+ // when unrolling, so only allow byteable registers as the source value. (We could
+ // consider just using BlkOpKindRepInstr instead.)
+ sourceRegMask = RBM_BYTE_REGS;
+ }
+#endif // _TARGET_X86_
+ break;
+
+ case GenTreeBlk::BlkOpKindRepInstr:
+ // rep stos has the following register requirements:
+ // a) The memory address to be in RDI.
+ // b) The fill value has to be in RAX.
+ // c) The buffer size will go in RCX.
+ dstAddrRegMask = RBM_RDI;
+ srcAddrOrFill = initVal;
+ sourceRegMask = RBM_RAX;
+ blkSizeRegMask = RBM_RCX;
+ break;
+
+ case GenTreeBlk::BlkOpKindHelper:
+#ifdef _TARGET_AMD64_
+ // The helper follows the regular AMD64 ABI.
+ dstAddrRegMask = RBM_ARG_0;
+ sourceRegMask = RBM_ARG_1;
+ blkSizeRegMask = RBM_ARG_2;
+#else // !_TARGET_AMD64_
+ dstAddrRegMask = RBM_RDI;
+ sourceRegMask = RBM_RAX;
+ blkSizeRegMask = RBM_RCX;
+#endif // !_TARGET_AMD64_
+ break;
+
+ default:
+ unreached();
+ }
+ }
+ else
+ {
+ // CopyObj or CopyBlk
+ if (source->gtOper == GT_IND)
+ {
+ srcAddrOrFill = blkNode->Data()->gtGetOp1();
+ // We're effectively setting source as contained, but can't call MakeSrcContained, because the
+ // "inheritance" of the srcCount is to a child not a parent - it would "just work" but could be misleading.
+ // If srcAddr is already non-contained, we don't need to change it.
+ if (srcAddrOrFill->gtLsraInfo.getDstCount() == 0)
+ {
+ srcAddrOrFill->gtLsraInfo.setDstCount(1);
+ srcAddrOrFill->gtLsraInfo.setSrcCount(source->gtLsraInfo.srcCount);
+ }
+ m_lsra->clearOperandCounts(source);
+ }
+ else if (!source->IsMultiRegCall() && !source->OperIsSIMD())
+ {
+ assert(source->IsLocal());
+ MakeSrcContained(blkNode, source);
+ }
+ if (blkNode->OperGet() == GT_STORE_OBJ)
+ {
+ if (blkNode->gtBlkOpKind == GenTreeBlk::BlkOpKindRepInstr)
+ {
+ // We need the size of the contiguous Non-GC-region to be in RCX to call rep movsq.
+ blkSizeRegMask = RBM_RCX;
+ }
+ // The srcAddr must be in a register. If it was under a GT_IND, we need to subsume all of its
+ // sources.
+ sourceRegMask = RBM_RSI;
+ dstAddrRegMask = RBM_RDI;
+ }
+ else
+ {
+ switch (blkNode->gtBlkOpKind)
+ {
+ case GenTreeBlk::BlkOpKindUnroll:
+ // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
+ //
+ // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
+ // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude
+ // RBM_NON_BYTE_REGS from internal candidates.
+ if ((size & (XMM_REGSIZE_BYTES - 1)) != 0)
+ {
+ blkNode->gtLsraInfo.internalIntCount++;
+ regMaskTP regMask = l->allRegs(TYP_INT);
+
+#ifdef _TARGET_X86_
+ if ((size & 1) != 0)
+ {
+ regMask &= ~RBM_NON_BYTE_REGS;
+ }
+#endif
+ blkNode->gtLsraInfo.setInternalCandidates(l, regMask);
+ }
+
+ if (size >= XMM_REGSIZE_BYTES)
+ {
+ // If we have a buffer larger than XMM_REGSIZE_BYTES,
+ // reserve an XMM register to use it for a
+ // series of 16-byte loads and stores.
+ blkNode->gtLsraInfo.internalFloatCount = 1;
+ blkNode->gtLsraInfo.addInternalCandidates(l, l->internalFloatRegCandidates());
+ // Uses XMM reg for load and store and hence check to see whether AVX instructions
+ // are used for codegen, set ContainsAVX flag
+ SetContainsAVXFlags();
+ }
+ // If src or dst are on stack, we don't have to generate the address
+ // into a register because it's just some constant+SP.
+ if ((srcAddrOrFill != nullptr) && srcAddrOrFill->OperIsLocalAddr())
+ {
+ MakeSrcContained(blkNode, srcAddrOrFill);
+ }
+
+ if (dstAddr->OperIsLocalAddr())
+ {
+ MakeSrcContained(blkNode, dstAddr);
+ }
+
+ break;
+
+ case GenTreeBlk::BlkOpKindRepInstr:
+ // rep stos has the following register requirements:
+ // a) The dest address has to be in RDI.
+ // b) The src address has to be in RSI.
+ // c) The buffer size will go in RCX.
+ dstAddrRegMask = RBM_RDI;
+ sourceRegMask = RBM_RSI;
+ blkSizeRegMask = RBM_RCX;
+ break;
+
+ case GenTreeBlk::BlkOpKindHelper:
+#ifdef _TARGET_AMD64_
+ // The helper follows the regular AMD64 ABI.
+ dstAddrRegMask = RBM_ARG_0;
+ sourceRegMask = RBM_ARG_1;
+ blkSizeRegMask = RBM_ARG_2;
+#else // !_TARGET_AMD64_
+ dstAddrRegMask = RBM_RDI;
+ sourceRegMask = RBM_RAX;
+ blkSizeRegMask = RBM_RCX;
+#endif // !_TARGET_AMD64_
+ break;
+
+ default:
+ unreached();
+ }
+ }
+ }
+
+ if (dstAddrRegMask != RBM_NONE)
+ {
+ dstAddr->gtLsraInfo.setSrcCandidates(l, dstAddrRegMask);
+ }
+ if (sourceRegMask != RBM_NONE)
+ {
+ if (srcAddrOrFill != nullptr)
+ {
+ srcAddrOrFill->gtLsraInfo.setSrcCandidates(l, sourceRegMask);
+ }
+ else
+ {
+ // This is a local source; we'll use a temp register for its address.
+ blkNode->gtLsraInfo.addInternalCandidates(l, sourceRegMask);
+ blkNode->gtLsraInfo.internalIntCount++;
+ }
+ }
+ if (blkSizeRegMask != RBM_NONE)
+ {
+ if (size != 0)
+ {
+ // Reserve a temp register for the block size argument.
+ blkNode->gtLsraInfo.addInternalCandidates(l, blkSizeRegMask);
+ blkNode->gtLsraInfo.internalIntCount++;
+ }
+ else
+ {
+ // The block size argument is a third argument to GT_STORE_DYN_BLK
+ noway_assert(blkNode->gtOper == GT_STORE_DYN_BLK);
+ blkNode->gtLsraInfo.setSrcCount(3);
+ GenTree* blockSize = blkNode->AsDynBlk()->gtDynamicSize;
+ blockSize->gtLsraInfo.setSrcCandidates(l, blkSizeRegMask);
+ }
+ }
+}
+
+#ifdef FEATURE_PUT_STRUCT_ARG_STK
+//------------------------------------------------------------------------
+// TreeNodeInfoInitPutArgStk: Set the NodeInfo for a GT_PUTARG_STK.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitPutArgStk(GenTreePutArgStk* putArgStk)
+{
+ TreeNodeInfo* info = &(putArgStk->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ info->srcCount = 0;
+
+#ifdef _TARGET_X86_
+ if (putArgStk->gtOp1->gtOper == GT_FIELD_LIST)
+ {
+ unsigned fieldCount = 0;
+ bool needsByteTemp = false;
+ bool needsSimdTemp = false;
+ unsigned prevOffset = putArgStk->getArgSize();
+ for (GenTreeFieldList* current = putArgStk->gtOp1->AsFieldList(); current != nullptr; current = current->Rest())
+ {
+ GenTree* const fieldNode = current->Current();
+ const var_types fieldType = fieldNode->TypeGet();
+ const unsigned fieldOffset = current->gtFieldOffset;
+ assert(fieldType != TYP_LONG);
+ info->srcCount++;
+
+ // For x86 we must mark all integral fields as contained or reg-optional, and handle them
+ // accordingly in code generation, since we may have up to 8 fields, which cannot all be in
+ // registers to be consumed atomically by the call.
+ if (varTypeIsIntegralOrI(fieldNode))
+ {
+ if (fieldNode->OperGet() == GT_LCL_VAR)
+ {
+ LclVarDsc* varDsc = &(comp->lvaTable[fieldNode->AsLclVarCommon()->gtLclNum]);
+ if (varDsc->lvTracked && !varDsc->lvDoNotEnregister)
+ {
+ SetRegOptional(fieldNode);
+ }
+ else
+ {
+ MakeSrcContained(putArgStk, fieldNode);
+ }
+ }
+ else if (fieldNode->IsIntCnsFitsInI32())
+ {
+ MakeSrcContained(putArgStk, fieldNode);
+ }
+ else
+ {
+ // For the case where we cannot directly push the value, if we run out of registers,
+ // it would be better to defer computation until we are pushing the arguments rather
+ // than spilling, but this situation is not all that common, as most cases of promoted
+ // structs do not have a large number of fields, and of those most are lclVars or
+ // copy-propagated constants.
+ SetRegOptional(fieldNode);
+ }
+ }
+#if defined(FEATURE_SIMD)
+ // Note that we need to check the GT_FIELD_LIST type, not the fieldType. This is because the
+ // GT_FIELD_LIST will be TYP_SIMD12 whereas the fieldType might be TYP_SIMD16 for lclVar, where
+ // we "round up" to 16.
+ else if (current->gtFieldType == TYP_SIMD12)
+ {
+ needsSimdTemp = true;
+ }
+#endif // defined(FEATURE_SIMD)
+ else
+ {
+ assert(varTypeIsFloating(fieldNode) || varTypeIsSIMD(fieldNode));
+ }
+
+ // We can treat as a slot any field that is stored at a slot boundary, where the previous
+ // field is not in the same slot. (Note that we store the fields in reverse order.)
+ const bool fieldIsSlot = ((fieldOffset % 4) == 0) && ((prevOffset - fieldOffset) >= 4);
+ if (!fieldIsSlot)
+ {
+ if (varTypeIsByte(fieldType))
+ {
+ // If this field is a slot--i.e. it is an integer field that is 4-byte aligned and takes up 4 bytes
+ // (including padding)--we can store the whole value rather than just the byte. Otherwise, we will
+ // need a byte-addressable register for the store. We will enforce this requirement on an internal
+ // register, which we can use to copy multiple byte values.
+ needsByteTemp = true;
+ }
+ }
+
+ if (varTypeIsGC(fieldType))
+ {
+ putArgStk->gtNumberReferenceSlots++;
+ }
+ prevOffset = fieldOffset;
+ fieldCount++;
+ }
+
+ info->dstCount = 0;
+
+ if (putArgStk->gtPutArgStkKind == GenTreePutArgStk::Kind::Push)
+ {
+ // If any of the fields cannot be stored with an actual push, we may need a temporary
+ // register to load the value before storing it to the stack location.
+ info->internalIntCount = 1;
+ regMaskTP regMask = l->allRegs(TYP_INT);
+ if (needsByteTemp)
+ {
+ regMask &= ~RBM_NON_BYTE_REGS;
+ }
+ info->setInternalCandidates(l, regMask);
+ }
+
+#if defined(FEATURE_SIMD)
+ // For PutArgStk of a TYP_SIMD12, we need a SIMD temp register.
+ if (needsSimdTemp)
+ {
+ info->internalFloatCount += 1;
+ info->addInternalCandidates(l, l->allSIMDRegs());
+ }
+#endif // defined(FEATURE_SIMD)
+
+ return;
+ }
+#endif // _TARGET_X86_
+
+#if defined(FEATURE_SIMD) && defined(_TARGET_X86_)
+ // For PutArgStk of a TYP_SIMD12, we need an extra register.
+ if (putArgStk->TypeGet() == TYP_SIMD12)
+ {
+ info->srcCount = putArgStk->gtOp1->gtLsraInfo.dstCount;
+ info->dstCount = 0;
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(l, l->allSIMDRegs());
+ return;
+ }
+#endif // defined(FEATURE_SIMD) && defined(_TARGET_X86_)
+
+ if (putArgStk->TypeGet() != TYP_STRUCT)
+ {
+ TreeNodeInfoInitSimple(putArgStk);
+ return;
+ }
+
+ GenTreePtr dst = putArgStk;
+ GenTreePtr src = putArgStk->gtOp1;
+ GenTreePtr srcAddr = nullptr;
+
+ bool haveLocalAddr = false;
+ if ((src->OperGet() == GT_OBJ) || (src->OperGet() == GT_IND))
+ {
+ srcAddr = src->gtOp.gtOp1;
+ assert(srcAddr != nullptr);
+ haveLocalAddr = srcAddr->OperIsLocalAddr();
+ }
+ else
+ {
+ assert(varTypeIsSIMD(putArgStk));
+ }
+
+ info->srcCount = src->gtLsraInfo.dstCount;
+ info->dstCount = 0;
+
+ // If we have a buffer between XMM_REGSIZE_BYTES and CPBLK_UNROLL_LIMIT bytes, we'll use SSE2.
+ // Structs and buffer with sizes <= CPBLK_UNROLL_LIMIT bytes are occurring in more than 95% of
+ // our framework assemblies, so this is the main code generation scheme we'll use.
+ ssize_t size = putArgStk->gtNumSlots * TARGET_POINTER_SIZE;
+ switch (putArgStk->gtPutArgStkKind)
+ {
+ case GenTreePutArgStk::Kind::Push:
+ case GenTreePutArgStk::Kind::PushAllSlots:
+ case GenTreePutArgStk::Kind::Unroll:
+ // If we have a remainder smaller than XMM_REGSIZE_BYTES, we need an integer temp reg.
+ //
+ // x86 specific note: if the size is odd, the last copy operation would be of size 1 byte.
+ // But on x86 only RBM_BYTE_REGS could be used as byte registers. Therefore, exclude
+ // RBM_NON_BYTE_REGS from internal candidates.
+ if ((putArgStk->gtNumberReferenceSlots == 0) && (size & (XMM_REGSIZE_BYTES - 1)) != 0)
+ {
+ info->internalIntCount++;
+ regMaskTP regMask = l->allRegs(TYP_INT);
+
+#ifdef _TARGET_X86_
+ if ((size % 2) != 0)
+ {
+ regMask &= ~RBM_NON_BYTE_REGS;
+ }
+#endif
+ info->setInternalCandidates(l, regMask);
+ }
+
+#ifdef _TARGET_X86_
+ if (size >= 8)
+#else // !_TARGET_X86_
+ if (size >= XMM_REGSIZE_BYTES)
+#endif // !_TARGET_X86_
+ {
+ // If we have a buffer larger than or equal to XMM_REGSIZE_BYTES on x64/ux,
+ // or larger than or equal to 8 bytes on x86, reserve an XMM register to use it for a
+ // series of 16-byte loads and stores.
+ info->internalFloatCount = 1;
+ info->addInternalCandidates(l, l->internalFloatRegCandidates());
+ SetContainsAVXFlags();
+ }
+ break;
+
+ case GenTreePutArgStk::Kind::RepInstr:
+ info->internalIntCount += 3;
+ info->setInternalCandidates(l, (RBM_RDI | RBM_RCX | RBM_RSI));
+ break;
+
+ default:
+ unreached();
+ }
+
+ // Always mark the OBJ and ADDR as contained trees by the putarg_stk. The codegen will deal with this tree.
+ MakeSrcContained(putArgStk, src);
+
+ if (haveLocalAddr)
+ {
+ // If the source address is the address of a lclVar, make the source address contained to avoid unnecessary
+ // copies.
+ //
+ // To avoid an assertion in MakeSrcContained, increment the parent's source count beforehand and decrement it
+ // afterwards.
+ info->srcCount++;
+ MakeSrcContained(putArgStk, srcAddr);
+ info->srcCount--;
+ }
+}
+#endif // FEATURE_PUT_STRUCT_ARG_STK
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitLclHeap: Set the NodeInfo for a GT_LCLHEAP.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitLclHeap(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+ Compiler* compiler = comp;
+
+ info->srcCount = 1;
+ info->dstCount = 1;
+
+ // Need a variable number of temp regs (see genLclHeap() in codegenamd64.cpp):
+ // Here '-' means don't care.
+ //
+ // Size? Init Memory? # temp regs
+ // 0 - 0 (returns 0)
+ // const and <=6 reg words - 0 (pushes '0')
+ // const and >6 reg words Yes 0 (pushes '0')
+ // const and <PageSize No 0 (amd64) 1 (x86)
+ // (x86:tmpReg for sutracting from esp)
+ // const and >=PageSize No 2 (regCnt and tmpReg for subtracing from sp)
+ // Non-const Yes 0 (regCnt=targetReg and pushes '0')
+ // Non-const No 2 (regCnt and tmpReg for subtracting from sp)
+ //
+ // Note: Here we don't need internal register to be different from targetReg.
+ // Rather, require it to be different from operand's reg.
+
+ GenTreePtr size = tree->gtOp.gtOp1;
+ if (size->IsCnsIntOrI())
+ {
+ MakeSrcContained(tree, size);
+
+ size_t sizeVal = size->gtIntCon.gtIconVal;
+
+ if (sizeVal == 0)
+ {
+ info->internalIntCount = 0;
+ }
+ else
+ {
+ // Compute the amount of memory to properly STACK_ALIGN.
+ // Note: The Gentree node is not updated here as it is cheap to recompute stack aligned size.
+ // This should also help in debugging as we can examine the original size specified with localloc.
+ sizeVal = AlignUp(sizeVal, STACK_ALIGN);
+
+ // For small allocations up to 6 pointer sized words (i.e. 48 bytes of localloc)
+ // we will generate 'push 0'.
+ assert((sizeVal % REGSIZE_BYTES) == 0);
+ size_t cntRegSizedWords = sizeVal / REGSIZE_BYTES;
+ if (cntRegSizedWords <= 6)
+ {
+ info->internalIntCount = 0;
+ }
+ else if (!compiler->info.compInitMem)
+ {
+ // No need to initialize allocated stack space.
+ if (sizeVal < compiler->eeGetPageSize())
+ {
+#ifdef _TARGET_X86_
+ info->internalIntCount = 1; // x86 needs a register here to avoid generating "sub" on ESP.
+#else // !_TARGET_X86_
+ info->internalIntCount = 0;
+#endif // !_TARGET_X86_
+ }
+ else
+ {
+ // We need two registers: regCnt and RegTmp
+ info->internalIntCount = 2;
+ }
+ }
+ else
+ {
+ // >6 and need to zero initialize allocated stack space.
+ info->internalIntCount = 0;
+ }
+ }
+ }
+ else
+ {
+ if (!compiler->info.compInitMem)
+ {
+ info->internalIntCount = 2;
+ }
+ else
+ {
+ info->internalIntCount = 0;
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitLogicalOp: Set the NodeInfo for GT_AND/GT_OR/GT_XOR,
+// as well as GT_ADD/GT_SUB.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitLogicalOp(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+
+ // We're not marking a constant hanging on the left of the add
+ // as containable so we assign it to a register having CQ impact.
+ // TODO-XArch-CQ: Detect this case and support both generating a single instruction
+ // for GT_ADD(Constant, SomeTree)
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+
+ // We can directly encode the second operand if it is either a containable constant or a memory-op.
+ // In case of memory-op, we can encode it directly provided its type matches with 'tree' type.
+ // This is because during codegen, type of 'tree' is used to determine emit Type size. If the types
+ // do not match, they get normalized (i.e. sign/zero extended) on load into a register.
+ bool directlyEncodable = false;
+ bool binOpInRMW = false;
+ GenTreePtr operand = nullptr;
+
+ if (IsContainableImmed(tree, op2))
+ {
+ directlyEncodable = true;
+ operand = op2;
+ }
+ else
+ {
+ binOpInRMW = IsBinOpInRMWStoreInd(tree);
+ if (!binOpInRMW)
+ {
+ if (op2->isMemoryOp() && tree->TypeGet() == op2->TypeGet())
+ {
+ directlyEncodable = true;
+ operand = op2;
+ }
+ else if (tree->OperIsCommutative())
+ {
+ if (IsContainableImmed(tree, op1) ||
+ (op1->isMemoryOp() && tree->TypeGet() == op1->TypeGet() && IsSafeToContainMem(tree, op1)))
+ {
+ // If it is safe, we can reverse the order of operands of commutative operations for efficient
+ // codegen
+ directlyEncodable = true;
+ operand = op1;
+ }
+ }
+ }
+ }
+
+ if (directlyEncodable)
+ {
+ assert(operand != nullptr);
+ MakeSrcContained(tree, operand);
+ }
+ else if (!binOpInRMW)
+ {
+ // If this binary op neither has contained operands, nor is a
+ // Read-Modify-Write (RMW) operation, we can mark its operands
+ // as reg optional.
+ SetRegOptionalForBinOp(tree);
+ }
+
+ // Codegen of this tree node sets ZF and SF flags.
+ tree->gtFlags |= GTF_ZSF_SET;
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitModDiv: Set the NodeInfo for GT_MOD/GT_DIV/GT_UMOD/GT_UDIV.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitModDiv(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ switch (tree->OperGet())
+ {
+ case GT_MOD:
+ case GT_DIV:
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ // No implicit conversions at this stage as the expectation is that
+ // everything is made explicit by adding casts.
+ assert(op1->TypeGet() == op2->TypeGet());
+
+ if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
+ {
+ MakeSrcContained(tree, op2);
+ }
+ else
+ {
+ // If there are no containable operands, we can make an operand reg optional.
+ // SSE2 allows only op2 to be a memory-op.
+ SetRegOptional(op2);
+ }
+
+ return;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ // Amd64 Div/Idiv instruction:
+ // Dividend in RAX:RDX and computes
+ // Quotient in RAX, Remainder in RDX
+
+ if (tree->OperGet() == GT_MOD || tree->OperGet() == GT_UMOD)
+ {
+ // We are interested in just the remainder.
+ // RAX is used as a trashable register during computation of remainder.
+ info->setDstCandidates(l, RBM_RDX);
+ }
+ else
+ {
+ // We are interested in just the quotient.
+ // RDX gets used as trashable register during computation of quotient
+ info->setDstCandidates(l, RBM_RAX);
+ }
+
+ bool op2CanBeRegOptional = true;
+#ifdef _TARGET_X86_
+ if (op1->OperGet() == GT_LONG)
+ {
+ // To avoid reg move would like to have op1's low part in RAX and high part in RDX.
+ GenTree* loVal = op1->gtGetOp1();
+ GenTree* hiVal = op1->gtGetOp2();
+
+ // Src count is actually 3, so increment.
+ assert(op2->IsCnsIntOrI());
+ assert(tree->OperGet() == GT_UMOD);
+ info->srcCount++;
+ op2CanBeRegOptional = false;
+
+ // This situation also requires an internal register.
+ info->internalIntCount = 1;
+ info->setInternalCandidates(l, l->allRegs(TYP_INT));
+
+ loVal->gtLsraInfo.setSrcCandidates(l, RBM_EAX);
+ hiVal->gtLsraInfo.setSrcCandidates(l, RBM_EDX);
+ }
+ else
+#endif
+ {
+ // If possible would like to have op1 in RAX to avoid a register move
+ op1->gtLsraInfo.setSrcCandidates(l, RBM_RAX);
+ }
+
+ // divisor can be an r/m, but the memory indirection must be of the same size as the divide
+ if (op2->isMemoryOp() && (op2->TypeGet() == tree->TypeGet()))
+ {
+ MakeSrcContained(tree, op2);
+ }
+ else if (op2CanBeRegOptional)
+ {
+ op2->gtLsraInfo.setSrcCandidates(l, l->allRegs(TYP_INT) & ~(RBM_RAX | RBM_RDX));
+
+ // If there are no containable operands, we can make an operand reg optional.
+ // Div instruction allows only op2 to be a memory op.
+ SetRegOptional(op2);
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitIntrinsic: Set the NodeInfo for a GT_INTRINSIC.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitIntrinsic(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* l = m_lsra;
+
+ // Both operand and its result must be of floating point type.
+ GenTree* op1 = tree->gtGetOp1();
+ assert(varTypeIsFloating(op1));
+ assert(op1->TypeGet() == tree->TypeGet());
+
+ info->srcCount = 1;
+ info->dstCount = 1;
+
+ switch (tree->gtIntrinsic.gtIntrinsicId)
+ {
+ case CORINFO_INTRINSIC_Sqrt:
+ if (op1->isMemoryOp() || op1->IsCnsNonZeroFltOrDbl())
+ {
+ MakeSrcContained(tree, op1);
+ }
+ else
+ {
+ // Mark the operand as reg optional since codegen can still
+ // generate code if op1 is on stack.
+ SetRegOptional(op1);
+ }
+ break;
+
+ case CORINFO_INTRINSIC_Abs:
+ // Abs(float x) = x & 0x7fffffff
+ // Abs(double x) = x & 0x7ffffff ffffffff
+
+ // In case of Abs we need an internal register to hold mask.
+
+ // TODO-XArch-CQ: avoid using an internal register for the mask.
+ // Andps or andpd both will operate on 128-bit operands.
+ // The data section constant to hold the mask is a 64-bit size.
+ // Therefore, we need both the operand and mask to be in
+ // xmm register. When we add support in emitter to emit 128-bit
+ // data constants and instructions that operate on 128-bit
+ // memory operands we can avoid the need for an internal register.
+ if (tree->gtIntrinsic.gtIntrinsicId == CORINFO_INTRINSIC_Abs)
+ {
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(l, l->internalFloatRegCandidates());
+ }
+ break;
+
+#ifdef _TARGET_X86_
+ case CORINFO_INTRINSIC_Cos:
+ case CORINFO_INTRINSIC_Sin:
+ case CORINFO_INTRINSIC_Round:
+ NYI_X86("Math intrinsics Cos, Sin and Round");
+ break;
+#endif // _TARGET_X86_
+
+ default:
+ // Right now only Sqrt/Abs are treated as math intrinsics
+ noway_assert(!"Unsupported math intrinsic");
+ unreached();
+ break;
+ }
+}
+
+#ifdef FEATURE_SIMD
+//------------------------------------------------------------------------
+// TreeNodeInfoInitSIMD: Set the NodeInfo for a GT_SIMD tree.
+//
+// Arguments:
+// tree - The GT_SIMD node of interest
+//
+// Return Value:
+// None.
+
+void Lowering::TreeNodeInfoInitSIMD(GenTree* tree)
+{
+ GenTreeSIMD* simdTree = tree->AsSIMD();
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+ LinearScan* lsra = m_lsra;
+ info->dstCount = 1;
+ SetContainsAVXFlags(true, simdTree->gtSIMDSize);
+ switch (simdTree->gtSIMDIntrinsicID)
+ {
+ GenTree* op1;
+ GenTree* op2;
+
+ case SIMDIntrinsicInit:
+ {
+ op1 = tree->gtOp.gtOp1;
+
+#if !defined(_TARGET_64BIT_)
+ if (op1->OperGet() == GT_LONG)
+ {
+ info->srcCount = 2;
+ }
+ else
+#endif // !defined(_TARGET_64BIT_)
+ {
+ info->srcCount = 1;
+ }
+
+ // This sets all fields of a SIMD struct to the given value.
+ // Mark op1 as contained if it is either zero or int constant of all 1's,
+ // or a float constant with 16 or 32 byte simdType (AVX case)
+ //
+ // Should never see small int base type vectors except for zero initialization.
+ assert(!varTypeIsSmallInt(simdTree->gtSIMDBaseType) || op1->IsIntegralConst(0));
+
+#if !defined(_TARGET_64BIT_)
+ if (op1->OperGet() == GT_LONG)
+ {
+ GenTree* op1lo = op1->gtGetOp1();
+ GenTree* op1hi = op1->gtGetOp2();
+
+ if ((op1lo->IsIntegralConst(0) && op1hi->IsIntegralConst(0)) ||
+ (op1lo->IsIntegralConst(-1) && op1hi->IsIntegralConst(-1)))
+ {
+ assert(op1->gtLsraInfo.srcCount == 0);
+ assert(op1->gtLsraInfo.dstCount == 0);
+ assert(op1lo->gtLsraInfo.srcCount == 0);
+ assert(op1lo->gtLsraInfo.dstCount == 1);
+ assert(op1hi->gtLsraInfo.srcCount == 0);
+ assert(op1hi->gtLsraInfo.dstCount == 1);
+
+ op1lo->gtLsraInfo.dstCount = 0;
+ op1hi->gtLsraInfo.dstCount = 0;
+ info->srcCount = 0;
+ }
+ else
+ {
+ // need a temp
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ info->isInternalRegDelayFree = true;
+ }
+ }
+ else
+#endif // !defined(_TARGET_64BIT_)
+ if (op1->IsFPZero() || op1->IsIntegralConst(0) ||
+ (varTypeIsIntegral(simdTree->gtSIMDBaseType) && op1->IsIntegralConst(-1)))
+ {
+ MakeSrcContained(tree, op1);
+ info->srcCount = 0;
+ }
+ else if ((comp->getSIMDInstructionSet() == InstructionSet_AVX) &&
+ ((simdTree->gtSIMDSize == 16) || (simdTree->gtSIMDSize == 32)))
+ {
+ // Either op1 is a float or dbl constant or an addr
+ if (op1->IsCnsFltOrDbl() || op1->OperIsLocalAddr())
+ {
+ MakeSrcContained(tree, op1);
+ info->srcCount = 0;
+ }
+ }
+ }
+ break;
+
+ case SIMDIntrinsicInitN:
+ {
+ info->srcCount = (short)(simdTree->gtSIMDSize / genTypeSize(simdTree->gtSIMDBaseType));
+
+ // Need an internal register to stitch together all the values into a single vector in a SIMD reg.
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ break;
+
+ case SIMDIntrinsicInitArray:
+ // We have an array and an index, which may be contained.
+ info->srcCount = 2;
+ CheckImmedAndMakeContained(tree, tree->gtGetOp2());
+ break;
+
+ case SIMDIntrinsicDiv:
+ // SSE2 has no instruction support for division on integer vectors
+ noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 2;
+ break;
+
+ case SIMDIntrinsicAbs:
+ // float/double vectors: This gets implemented as bitwise-And operation
+ // with a mask and hence should never see here.
+ //
+ // Must be a Vector<int> or Vector<short> Vector<sbyte>
+ assert(simdTree->gtSIMDBaseType == TYP_INT || simdTree->gtSIMDBaseType == TYP_SHORT ||
+ simdTree->gtSIMDBaseType == TYP_BYTE);
+ assert(comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+ info->srcCount = 1;
+ break;
+
+ case SIMDIntrinsicSqrt:
+ // SSE2 has no instruction support for sqrt on integer vectors.
+ noway_assert(varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 1;
+ break;
+
+ case SIMDIntrinsicAdd:
+ case SIMDIntrinsicSub:
+ case SIMDIntrinsicMul:
+ case SIMDIntrinsicBitwiseAnd:
+ case SIMDIntrinsicBitwiseAndNot:
+ case SIMDIntrinsicBitwiseOr:
+ case SIMDIntrinsicBitwiseXor:
+ case SIMDIntrinsicMin:
+ case SIMDIntrinsicMax:
+ info->srcCount = 2;
+
+ // SSE2 32-bit integer multiplication requires two temp regs
+ if (simdTree->gtSIMDIntrinsicID == SIMDIntrinsicMul && simdTree->gtSIMDBaseType == TYP_INT &&
+ comp->getSIMDInstructionSet() == InstructionSet_SSE2)
+ {
+ info->internalFloatCount = 2;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ break;
+
+ case SIMDIntrinsicEqual:
+ info->srcCount = 2;
+ break;
+
+ // SSE2 doesn't support < and <= directly on int vectors.
+ // Instead we need to use > and >= with swapped operands.
+ case SIMDIntrinsicLessThan:
+ case SIMDIntrinsicLessThanOrEqual:
+ info->srcCount = 2;
+ noway_assert(!varTypeIsIntegral(simdTree->gtSIMDBaseType));
+ break;
+
+ // SIMDIntrinsicEqual is supported only on non-floating point base type vectors.
+ // SSE2 cmpps/pd doesn't support > and >= directly on float/double vectors.
+ // Instead we need to use < and <= with swapped operands.
+ case SIMDIntrinsicGreaterThan:
+ noway_assert(!varTypeIsFloating(simdTree->gtSIMDBaseType));
+ info->srcCount = 2;
+ break;
+
+ case SIMDIntrinsicOpEquality:
+ case SIMDIntrinsicOpInEquality:
+ info->srcCount = 2;
+
+ // On SSE4/AVX, we can generate optimal code for (in)equality
+ // against zero using ptest. We can safely do this optimization
+ // for integral vectors but not for floating-point for the reason
+ // that we have +0.0 and -0.0 and +0.0 == -0.0
+ op2 = tree->gtGetOp2();
+ if ((comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4) && op2->IsIntegralConstVector(0))
+ {
+ MakeSrcContained(tree, op2);
+ }
+ else
+ {
+ // Need one SIMD register as scratch.
+ // See genSIMDIntrinsicRelOp() for details on code sequence generated and
+ // the need for one scratch register.
+ //
+ // Note these intrinsics produce a BOOL result, hence internal float
+ // registers reserved are guaranteed to be different from target
+ // integer register without explicitly specifying.
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ break;
+
+ case SIMDIntrinsicDotProduct:
+ // Float/Double vectors:
+ // For SSE, or AVX with 32-byte vectors, we also need an internal register
+ // as scratch. Further we need the targetReg and internal reg to be distinct
+ // registers. Note that if this is a TYP_SIMD16 or smaller on AVX, then we
+ // don't need a tmpReg.
+ //
+ // 32-byte integer vector on SSE4/AVX:
+ // will take advantage of phaddd, which operates only on 128-bit xmm reg.
+ // This will need 1 (in case of SSE4) or 2 (in case of AVX) internal
+ // registers since targetReg is an int type register.
+ //
+ // See genSIMDIntrinsicDotProduct() for details on code sequence generated
+ // and the need for scratch registers.
+ if (varTypeIsFloating(simdTree->gtSIMDBaseType))
+ {
+ if ((comp->getSIMDInstructionSet() == InstructionSet_SSE2) ||
+ (simdTree->gtOp.gtOp1->TypeGet() == TYP_SIMD32))
+ {
+ info->internalFloatCount = 1;
+ info->isInternalRegDelayFree = true;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ // else don't need scratch reg(s).
+ }
+ else
+ {
+ assert(simdTree->gtSIMDBaseType == TYP_INT && comp->getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+
+ // No need to set isInternalRegDelayFree since targetReg is a
+ // an int type reg and guaranteed to be different from xmm/ymm
+ // regs.
+ info->internalFloatCount = comp->canUseAVX() ? 2 : 1;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ info->srcCount = 2;
+ break;
+
+ case SIMDIntrinsicGetItem:
+ {
+ // This implements get_Item method. The sources are:
+ // - the source SIMD struct
+ // - index (which element to get)
+ // The result is baseType of SIMD struct.
+ info->srcCount = 2;
+ op1 = tree->gtOp.gtOp1;
+ op2 = tree->gtOp.gtOp2;
+
+ // If the index is a constant, mark it as contained.
+ if (CheckImmedAndMakeContained(tree, op2))
+ {
+ info->srcCount = 1;
+ }
+
+ if (op1->isMemoryOp())
+ {
+ MakeSrcContained(tree, op1);
+
+ // Although GT_IND of TYP_SIMD12 reserves an internal float
+ // register for reading 4 and 8 bytes from memory and
+ // assembling them into target XMM reg, it is not required
+ // in this case.
+ op1->gtLsraInfo.internalIntCount = 0;
+ op1->gtLsraInfo.internalFloatCount = 0;
+ }
+ else
+ {
+ // If the index is not a constant, we will use the SIMD temp location to store the vector.
+ // Otherwise, if the baseType is floating point, the targetReg will be a xmm reg and we
+ // can use that in the process of extracting the element.
+ //
+ // If the index is a constant and base type is a small int we can use pextrw, but on AVX
+ // we will need a temp if are indexing into the upper half of the AVX register.
+ // In all other cases with constant index, we need a temp xmm register to extract the
+ // element if index is other than zero.
+
+ if (!op2->IsCnsIntOrI())
+ {
+ (void)comp->getSIMDInitTempVarNum();
+ }
+ else if (!varTypeIsFloating(simdTree->gtSIMDBaseType))
+ {
+ bool needFloatTemp;
+ if (varTypeIsSmallInt(simdTree->gtSIMDBaseType) &&
+ (comp->getSIMDInstructionSet() == InstructionSet_AVX))
+ {
+ int byteShiftCnt = (int)op2->AsIntCon()->gtIconVal * genTypeSize(simdTree->gtSIMDBaseType);
+ needFloatTemp = (byteShiftCnt >= 16);
+ }
+ else
+ {
+ needFloatTemp = !op2->IsIntegralConst(0);
+ }
+
+ if (needFloatTemp)
+ {
+ info->internalFloatCount = 1;
+ info->setInternalCandidates(lsra, lsra->allSIMDRegs());
+ }
+ }
+ }
+ }
+ break;
+
+ case SIMDIntrinsicSetX:
+ case SIMDIntrinsicSetY:
+ case SIMDIntrinsicSetZ:
+ case SIMDIntrinsicSetW:
+ info->srcCount = 2;
+
+ // We need an internal integer register for SSE2 codegen
+ if (comp->getSIMDInstructionSet() == InstructionSet_SSE2)
+ {
+ info->internalIntCount = 1;
+ info->setInternalCandidates(lsra, lsra->allRegs(TYP_INT));
+ }
+
+ break;
+
+ case SIMDIntrinsicCast:
+ info->srcCount = 1;
+ break;
+
+ case SIMDIntrinsicShuffleSSE2:
+ info->srcCount = 2;
+ // Second operand is an integer constant and marked as contained.
+ op2 = tree->gtOp.gtOp2;
+ noway_assert(op2->IsCnsIntOrI());
+ MakeSrcContained(tree, op2);
+ break;
+
+ case SIMDIntrinsicGetX:
+ case SIMDIntrinsicGetY:
+ case SIMDIntrinsicGetZ:
+ case SIMDIntrinsicGetW:
+ case SIMDIntrinsicGetOne:
+ case SIMDIntrinsicGetZero:
+ case SIMDIntrinsicGetCount:
+ case SIMDIntrinsicGetAllOnes:
+ assert(!"Get intrinsics should not be seen during Lowering.");
+ unreached();
+
+ default:
+ noway_assert(!"Unimplemented SIMD node type.");
+ unreached();
+ }
+}
+#endif // FEATURE_SIMD
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCast: Set the NodeInfo for a GT_CAST.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitCast(GenTree* tree)
+{
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+ // TODO-XArch-CQ: Int-To-Int conversions - castOp cannot be a memory op and must have an assigned register.
+ // see CodeGen::genIntToIntCast()
+
+ info->srcCount = 1;
+ info->dstCount = 1;
+
+ // Non-overflow casts to/from float/double are done using SSE2 instructions
+ // and that allow the source operand to be either a reg or memop. Given the
+ // fact that casts from small int to float/double are done as two-level casts,
+ // the source operand is always guaranteed to be of size 4 or 8 bytes.
+ var_types castToType = tree->CastToType();
+ GenTreePtr castOp = tree->gtCast.CastOp();
+ var_types castOpType = castOp->TypeGet();
+ if (tree->gtFlags & GTF_UNSIGNED)
+ {
+ castOpType = genUnsignedType(castOpType);
+ }
+
+ if (!tree->gtOverflow() && (varTypeIsFloating(castToType) || varTypeIsFloating(castOpType)))
+ {
+#ifdef DEBUG
+ // If converting to float/double, the operand must be 4 or 8 byte in size.
+ if (varTypeIsFloating(castToType))
+ {
+ unsigned opSize = genTypeSize(castOpType);
+ assert(opSize == 4 || opSize == 8);
+ }
+#endif // DEBUG
+
+ // U8 -> R8 conversion requires that the operand be in a register.
+ if (castOpType != TYP_ULONG)
+ {
+ if (castOp->isMemoryOp() || castOp->IsCnsNonZeroFltOrDbl())
+ {
+ MakeSrcContained(tree, castOp);
+ }
+ else
+ {
+ // Mark castOp as reg optional to indicate codegen
+ // can still generate code if it is on stack.
+ SetRegOptional(castOp);
+ }
+ }
+ }
+
+#if !defined(_TARGET_64BIT_)
+ if (varTypeIsLong(castOpType))
+ {
+ noway_assert(castOp->OperGet() == GT_LONG);
+ info->srcCount = 2;
+ }
+#endif // !defined(_TARGET_64BIT_)
+
+ // some overflow checks need a temp reg:
+ // - GT_CAST from INT64/UINT64 to UINT32
+ if (tree->gtOverflow() && (castToType == TYP_UINT))
+ {
+ if (genTypeSize(castOpType) == 8)
+ {
+ // Here we don't need internal register to be different from targetReg,
+ // rather require it to be different from operand's reg.
+ info->internalIntCount = 1;
+ }
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitGCWriteBarrier: Set the NodeInfo for a GT_STOREIND requiring a write barrier.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitGCWriteBarrier(GenTree* tree)
+{
+ assert(tree->OperGet() == GT_STOREIND);
+
+ GenTreeStoreInd* dst = tree->AsStoreInd();
+ GenTreePtr addr = dst->Addr();
+ GenTreePtr src = dst->Data();
+
+ if (addr->OperGet() == GT_LEA)
+ {
+ // In the case where we are doing a helper assignment, if the dst
+ // is an indir through an lea, we need to actually instantiate the
+ // lea in a register
+ GenTreeAddrMode* lea = addr->AsAddrMode();
+
+ int leaSrcCount = 0;
+ if (lea->HasBase())
+ {
+ leaSrcCount++;
+ }
+ if (lea->HasIndex())
+ {
+ leaSrcCount++;
+ }
+ lea->gtLsraInfo.srcCount = leaSrcCount;
+ lea->gtLsraInfo.dstCount = 1;
+ }
+
+ bool useOptimizedWriteBarrierHelper = false; // By default, assume no optimized write barriers.
+
+#if NOGC_WRITE_BARRIERS
+
+#if defined(_TARGET_X86_)
+
+ useOptimizedWriteBarrierHelper = true; // On x86, use the optimized write barriers by default.
+#ifdef DEBUG
+ GCInfo::WriteBarrierForm wbf = comp->codeGen->gcInfo.gcIsWriteBarrierCandidate(tree, src);
+ if (wbf == GCInfo::WBF_NoBarrier_CheckNotHeapInDebug) // This one is always a call to a C++ method.
+ {
+ useOptimizedWriteBarrierHelper = false;
+ }
+#endif
+
+ if (useOptimizedWriteBarrierHelper)
+ {
+ // Special write barrier:
+ // op1 (addr) goes into REG_WRITE_BARRIER (rdx) and
+ // op2 (src) goes into any int register.
+ addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER);
+ src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_WRITE_BARRIER_SRC);
+ }
+
+#else // !defined(_TARGET_X86_)
+#error "NOGC_WRITE_BARRIERS is not supported"
+#endif // !defined(_TARGET_X86_)
+
+#endif // NOGC_WRITE_BARRIERS
+
+ if (!useOptimizedWriteBarrierHelper)
+ {
+ // For the standard JIT Helper calls:
+ // op1 (addr) goes into REG_ARG_0 and
+ // op2 (src) goes into REG_ARG_1
+ addr->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_0);
+ src->gtLsraInfo.setSrcCandidates(m_lsra, RBM_ARG_1);
+ }
+
+ // Both src and dst must reside in a register, which they should since we haven't set
+ // either of them as contained.
+ assert(addr->gtLsraInfo.dstCount == 1);
+ assert(src->gtLsraInfo.dstCount == 1);
+}
+
+//-----------------------------------------------------------------------------------------
+// TreeNodeInfoInitIndir: Specify register requirements for address expression of an indirection operation.
+//
+// Arguments:
+// indirTree - GT_IND or GT_STOREIND gentree node
+//
+void Lowering::TreeNodeInfoInitIndir(GenTreePtr indirTree)
+{
+ assert(indirTree->isIndir());
+ // If this is the rhs of a block copy (i.e. non-enregisterable struct),
+ // it has no register requirements.
+ if (indirTree->TypeGet() == TYP_STRUCT)
+ {
+ return;
+ }
+
+ GenTreePtr addr = indirTree->gtGetOp1();
+ TreeNodeInfo* info = &(indirTree->gtLsraInfo);
+
+ GenTreePtr base = nullptr;
+ GenTreePtr index = nullptr;
+ unsigned mul, cns;
+ bool rev;
+
+#ifdef FEATURE_SIMD
+ // If indirTree is of TYP_SIMD12, don't mark addr as contained
+ // so that it always get computed to a register. This would
+ // mean codegen side logic doesn't need to handle all possible
+ // addr expressions that could be contained.
+ //
+ // TODO-XArch-CQ: handle other addr mode expressions that could be marked
+ // as contained.
+ if (indirTree->TypeGet() == TYP_SIMD12)
+ {
+ // Vector3 is read/written as two reads/writes: 8 byte and 4 byte.
+ // To assemble the vector properly we would need an additional
+ // XMM register.
+ info->internalFloatCount = 1;
+
+ // In case of GT_IND we need an internal register different from targetReg and
+ // both of the registers are used at the same time.
+ if (indirTree->OperGet() == GT_IND)
+ {
+ info->isInternalRegDelayFree = true;
+ }
+
+ info->setInternalCandidates(m_lsra, m_lsra->allSIMDRegs());
+
+ return;
+ }
+#endif // FEATURE_SIMD
+
+ if ((indirTree->gtFlags & GTF_IND_REQ_ADDR_IN_REG) != 0)
+ {
+ // The address of an indirection that requires its address in a reg.
+ // Skip any further processing that might otherwise make it contained.
+ }
+ else if ((addr->OperGet() == GT_CLS_VAR_ADDR) || (addr->OperGet() == GT_LCL_VAR_ADDR))
+ {
+ // These nodes go into an addr mode:
+ // - GT_CLS_VAR_ADDR turns into a constant.
+ // - GT_LCL_VAR_ADDR is a stack addr mode.
+
+ // make this contained, it turns into a constant that goes into an addr mode
+ MakeSrcContained(indirTree, addr);
+ }
+ else if (addr->IsCnsIntOrI() && addr->AsIntConCommon()->FitsInAddrBase(comp))
+ {
+ // Amd64:
+ // We can mark any pc-relative 32-bit addr as containable, except for a direct VSD call address.
+ // (i.e. those VSD calls for which stub addr is known during JIT compilation time). In this case,
+ // VM requires us to pass stub addr in REG_VIRTUAL_STUB_PARAM - see LowerVirtualStubCall(). For
+ // that reason we cannot mark such an addr as contained. Note that this is not an issue for
+ // indirect VSD calls since morphArgs() is explicitly materializing hidden param as a non-standard
+ // argument.
+ //
+ // Workaround:
+ // Note that LowerVirtualStubCall() sets addr->gtRegNum to REG_VIRTUAL_STUB_PARAM and Lowering::doPhase()
+ // sets destination candidates on such nodes and resets addr->gtRegNum to REG_NA before calling
+ // TreeNodeInfoInit(). Ideally we should set a flag on addr nodes that shouldn't be marked as contained
+ // (in LowerVirtualStubCall()), but we don't have any GTF_* flags left for that purpose. As a workaround
+ // an explicit check is made here.
+ //
+ // On x86, direct VSD is done via a relative branch, and in fact it MUST be contained.
+ MakeSrcContained(indirTree, addr);
+ }
+ else if ((addr->OperGet() == GT_LEA) && IsSafeToContainMem(indirTree, addr))
+ {
+ MakeSrcContained(indirTree, addr);
+ }
+ else if (addr->gtOper == GT_ARR_ELEM)
+ {
+ // The GT_ARR_ELEM consumes all the indices and produces the offset.
+ // The array object lives until the mem access.
+ // We also consume the target register to which the address is
+ // computed
+
+ info->srcCount++;
+ assert(addr->gtLsraInfo.srcCount >= 2);
+ addr->gtLsraInfo.srcCount -= 1;
+ }
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitCmp: Set the register requirements for a compare.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitCmp(GenTreePtr tree)
+{
+ assert(tree->OperIsCompare());
+
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+#ifdef _TARGET_X86_
+ // If the compare is used by a jump, we just need to set the condition codes. If not, then we need
+ // to store the result into the low byte of a register, which requires the dst be a byteable register.
+ // We always set the dst candidates, though, because if this is compare is consumed by a jump, they
+ // won't be used. We might be able to use GTF_RELOP_JMP_USED to determine this case, but it's not clear
+ // that flag is maintained until this location (especially for decomposed long compares).
+ info->setDstCandidates(m_lsra, RBM_BYTE_REGS);
+#endif // _TARGET_X86_
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+ var_types op1Type = op1->TypeGet();
+ var_types op2Type = op2->TypeGet();
+
+#if !defined(_TARGET_64BIT_)
+ // Long compares will consume GT_LONG nodes, each of which produces two results.
+ // Thus for each long operand there will be an additional source.
+ // TODO-X86-CQ: Mark hiOp2 and loOp2 as contained if it is a constant or a memory op.
+ if (varTypeIsLong(op1Type))
+ {
+ info->srcCount++;
+ }
+ if (varTypeIsLong(op2Type))
+ {
+ info->srcCount++;
+ }
+#endif // !defined(_TARGET_64BIT_)
+
+ // If either of op1 or op2 is floating point values, then we need to use
+ // ucomiss or ucomisd to compare, both of which support the following form:
+ // ucomis[s|d] xmm, xmm/mem
+ // That is only the second operand can be a memory op.
+ //
+ // Second operand is a memory Op: Note that depending on comparison operator,
+ // the operands of ucomis[s|d] need to be reversed. Therefore, either op1 or
+ // op2 can be a memory op depending on the comparison operator.
+ if (varTypeIsFloating(op1Type))
+ {
+ // The type of the operands has to be the same and no implicit conversions at this stage.
+ assert(op1Type == op2Type);
+
+ bool reverseOps;
+ if ((tree->gtFlags & GTF_RELOP_NAN_UN) != 0)
+ {
+ // Unordered comparison case
+ reverseOps = tree->OperIs(GT_GT, GT_GE);
+ }
+ else
+ {
+ reverseOps = tree->OperIs(GT_LT, GT_LE);
+ }
+
+ GenTreePtr otherOp;
+ if (reverseOps)
+ {
+ otherOp = op1;
+ }
+ else
+ {
+ otherOp = op2;
+ }
+
+ assert(otherOp != nullptr);
+ if (otherOp->IsCnsNonZeroFltOrDbl())
+ {
+ MakeSrcContained(tree, otherOp);
+ }
+ else if (otherOp->isMemoryOp() && ((otherOp == op2) || IsSafeToContainMem(tree, otherOp)))
+ {
+ MakeSrcContained(tree, otherOp);
+ }
+ else
+ {
+ // SSE2 allows only otherOp to be a memory-op. Since otherOp is not
+ // contained, we can mark it reg-optional.
+ SetRegOptional(otherOp);
+ }
+
+ return;
+ }
+
+ // TODO-XArch-CQ: factor out cmp optimization in 'genCondSetFlags' to be used here
+ // or in other backend.
+
+ if (CheckImmedAndMakeContained(tree, op2))
+ {
+ // If the types are the same, or if the constant is of the correct size,
+ // we can treat the isMemoryOp as contained.
+ if (op1Type == op2Type)
+ {
+ if (op1->isMemoryOp())
+ {
+ MakeSrcContained(tree, op1);
+ }
+ // If op1 codegen sets ZF and SF flags and ==/!= against
+ // zero, we don't need to generate test instruction,
+ // provided we don't have another GenTree node between op1
+ // and tree that could potentially modify flags.
+ //
+ // TODO-CQ: right now the below peep is inexpensive and
+ // gets the benefit in most of cases because in majority
+ // of cases op1, op2 and tree would be in that order in
+ // execution. In general we should be able to check that all
+ // the nodes that come after op1 in execution order do not
+ // modify the flags so that it is safe to avoid generating a
+ // test instruction. Such a check requires that on each
+ // GenTree node we need to set the info whether its codegen
+ // will modify flags.
+ //
+ // TODO-CQ: We can optimize compare against zero in the
+ // following cases by generating the branch as indicated
+ // against each case.
+ // 1) unsigned compare
+ // < 0 - always FALSE
+ // <= 0 - ZF=1 and jne
+ // > 0 - ZF=0 and je
+ // >= 0 - always TRUE
+ //
+ // 2) signed compare
+ // < 0 - SF=1 and js
+ // >= 0 - SF=0 and jns
+ else if (tree->OperIs(GT_EQ, GT_NE) && op1->gtSetZSFlags() && op2->IsIntegralConst(0) &&
+ (op1->gtNext == op2) && (op2->gtNext == tree))
+ {
+ // Require codegen of op1 to set the flags.
+ assert(!op1->gtSetFlags());
+ op1->gtFlags |= GTF_SET_FLAGS;
+ }
+ else
+ {
+ SetRegOptional(op1);
+ }
+ }
+ }
+ else if (op1Type == op2Type)
+ {
+ // Note that TEST does not have a r,rm encoding like CMP has but we can still
+ // contain the second operand because the emitter maps both r,rm and rm,r to
+ // the same instruction code. This avoids the need to special case TEST here.
+ if (op2->isMemoryOp())
+ {
+ MakeSrcContained(tree, op2);
+ }
+ else if (op1->isMemoryOp() && IsSafeToContainMem(tree, op1))
+ {
+ MakeSrcContained(tree, op1);
+ }
+ else if (op1->IsCnsIntOrI())
+ {
+ // TODO-CQ: We should be able to support swapping op1 and op2 to generate cmp reg, imm,
+ // but there is currently an assert in CodeGen::genCompareInt().
+ // https://github.com/dotnet/coreclr/issues/7270
+ SetRegOptional(op2);
+ }
+ else
+ {
+ // One of op1 or op2 could be marked as reg optional
+ // to indicate that codegen can still generate code
+ // if one of them is on stack.
+ SetRegOptional(PreferredRegOptionalOperand(tree));
+ }
+ }
+}
+
+//--------------------------------------------------------------------------------------------
+// TreeNodeInfoInitIfRMWMemOp: Checks to see if there is a RMW memory operation rooted at
+// GT_STOREIND node and if so will mark register requirements for nodes under storeInd so
+// that CodeGen will generate a single instruction of the form:
+//
+// binOp [addressing mode], reg
+//
+// Parameters
+// storeInd - GT_STOREIND node
+//
+// Return value
+// True, if RMW memory op tree pattern is recognized and op counts are set.
+// False otherwise.
+//
+bool Lowering::TreeNodeInfoInitIfRMWMemOp(GenTreePtr storeInd)
+{
+ assert(storeInd->OperGet() == GT_STOREIND);
+
+ // SSE2 doesn't support RMW on float values
+ assert(!varTypeIsFloating(storeInd));
+
+ // Terminology:
+ // indirDst = memory write of an addr mode (i.e. storeind destination)
+ // indirSrc = value being written to memory (i.e. storeind source which could a binary/unary op)
+ // indirCandidate = memory read i.e. a gtInd of an addr mode
+ // indirOpSource = source operand used in binary/unary op (i.e. source operand of indirSrc node)
+
+ GenTreePtr indirCandidate = nullptr;
+ GenTreePtr indirOpSource = nullptr;
+
+ if (!IsRMWMemOpRootedAtStoreInd(storeInd, &indirCandidate, &indirOpSource))
+ {
+ JITDUMP("Lower of StoreInd didn't mark the node as self contained for reason: %d\n",
+ storeInd->AsStoreInd()->GetRMWStatus());
+ DISPTREERANGE(BlockRange(), storeInd);
+ return false;
+ }
+
+ GenTreePtr indirDst = storeInd->gtGetOp1();
+ GenTreePtr indirSrc = storeInd->gtGetOp2();
+ genTreeOps oper = indirSrc->OperGet();
+
+ // At this point we have successfully detected a RMW memory op of one of the following forms
+ // storeInd(indirDst, indirSrc(indirCandidate, indirOpSource)) OR
+ // storeInd(indirDst, indirSrc(indirOpSource, indirCandidate) in case of commutative operations OR
+ // storeInd(indirDst, indirSrc(indirCandidate) in case of unary operations
+ //
+ // Here indirSrc = one of the supported binary or unary operation for RMW of memory
+ // indirCandidate = a GT_IND node
+ // indirCandidateChild = operand of GT_IND indirCandidate
+ //
+ // The logic below essentially does the following
+ // Make indirOpSource contained.
+ // Make indirSrc contained.
+ // Make indirCandidate contained.
+ // Make indirCandidateChild contained.
+ // Make indirDst contained except when it is a GT_LCL_VAR or GT_CNS_INT that doesn't fit within addr
+ // base.
+ // Note that due to the way containment is supported, we accomplish some of the above by clearing operand counts
+ // and directly propagating them upward.
+ //
+
+ TreeNodeInfo* info = &(storeInd->gtLsraInfo);
+ info->dstCount = 0;
+
+ if (GenTree::OperIsBinary(oper))
+ {
+ // On Xarch RMW operations require that the source memory-op be in a register.
+ assert(!indirOpSource->isMemoryOp() || indirOpSource->gtLsraInfo.dstCount == 1);
+ JITDUMP("Lower succesfully detected an assignment of the form: *addrMode BinOp= source\n");
+ info->srcCount = indirOpSource->gtLsraInfo.dstCount;
+ }
+ else
+ {
+ assert(GenTree::OperIsUnary(oper));
+ JITDUMP("Lower succesfully detected an assignment of the form: *addrMode = UnaryOp(*addrMode)\n");
+ info->srcCount = 0;
+ }
+ DISPTREERANGE(BlockRange(), storeInd);
+
+ m_lsra->clearOperandCounts(indirSrc);
+ m_lsra->clearOperandCounts(indirCandidate);
+
+ GenTreePtr indirCandidateChild = indirCandidate->gtGetOp1();
+ if (indirCandidateChild->OperGet() == GT_LEA)
+ {
+ GenTreeAddrMode* addrMode = indirCandidateChild->AsAddrMode();
+
+ if (addrMode->HasBase())
+ {
+ assert(addrMode->Base()->OperIsLeaf());
+ m_lsra->clearOperandCounts(addrMode->Base());
+ info->srcCount++;
+ }
+
+ if (addrMode->HasIndex())
+ {
+ assert(addrMode->Index()->OperIsLeaf());
+ m_lsra->clearOperandCounts(addrMode->Index());
+ info->srcCount++;
+ }
+
+ m_lsra->clearOperandCounts(indirDst);
+ }
+ else
+ {
+ assert(indirCandidateChild->OperGet() == GT_LCL_VAR || indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR ||
+ indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR || indirCandidateChild->OperGet() == GT_CNS_INT);
+
+ // If it is a GT_LCL_VAR, it still needs the reg to hold the address.
+ // We would still need a reg for GT_CNS_INT if it doesn't fit within addressing mode base.
+ // For GT_CLS_VAR_ADDR, we don't need a reg to hold the address, because field address value is known at jit
+ // time. Also, we don't need a reg for GT_CLS_VAR_ADDR.
+ if (indirCandidateChild->OperGet() == GT_LCL_VAR_ADDR || indirCandidateChild->OperGet() == GT_CLS_VAR_ADDR)
+ {
+ m_lsra->clearOperandCounts(indirDst);
+ }
+ else if (indirCandidateChild->IsCnsIntOrI() && indirCandidateChild->AsIntConCommon()->FitsInAddrBase(comp))
+ {
+ m_lsra->clearOperandCounts(indirDst);
+ }
+ else
+ {
+ // Need a reg and hence increment src count of storeind
+ info->srcCount += indirCandidateChild->gtLsraInfo.dstCount;
+ }
+ }
+ m_lsra->clearOperandCounts(indirCandidateChild);
+
+#ifdef _TARGET_X86_
+ if (varTypeIsByte(storeInd))
+ {
+ // If storeInd is of TYP_BYTE, set indirOpSources to byteable registers.
+ bool containedNode = indirOpSource->gtLsraInfo.dstCount == 0;
+ if (!containedNode)
+ {
+ regMaskTP regMask = indirOpSource->gtLsraInfo.getSrcCandidates(m_lsra);
+ assert(regMask != RBM_NONE);
+ indirOpSource->gtLsraInfo.setSrcCandidates(m_lsra, regMask & ~RBM_NON_BYTE_REGS);
+ }
+ }
+#endif
+
+ return true;
+}
+
+//------------------------------------------------------------------------
+// TreeNodeInfoInitMul: Set the NodeInfo for a multiply.
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// None.
+//
+void Lowering::TreeNodeInfoInitMul(GenTreePtr tree)
+{
+#if defined(_TARGET_X86_)
+ assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI || tree->OperGet() == GT_MUL_LONG);
+#else
+ assert(tree->OperGet() == GT_MUL || tree->OperGet() == GT_MULHI);
+#endif
+ TreeNodeInfo* info = &(tree->gtLsraInfo);
+
+ info->srcCount = 2;
+ info->dstCount = 1;
+
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtOp.gtOp2;
+
+ // Case of float/double mul.
+ if (varTypeIsFloating(tree->TypeGet()))
+ {
+ assert(tree->OperGet() == GT_MUL);
+
+ if (op2->isMemoryOp() || op2->IsCnsNonZeroFltOrDbl())
+ {
+ MakeSrcContained(tree, op2);
+ }
+ else if (op1->IsCnsNonZeroFltOrDbl() || (op1->isMemoryOp() && IsSafeToContainMem(tree, op1)))
+ {
+ // Since GT_MUL is commutative, we will try to re-order operands if it is safe to
+ // generate more efficient code sequence for the case of GT_MUL(op1=memOp, op2=non-memOp)
+ MakeSrcContained(tree, op1);
+ }
+ else
+ {
+ // If there are no containable operands, we can make an operand reg optional.
+ SetRegOptionalForBinOp(tree);
+ }
+ return;
+ }
+
+ bool isUnsignedMultiply = ((tree->gtFlags & GTF_UNSIGNED) != 0);
+ bool requiresOverflowCheck = tree->gtOverflowEx();
+ bool useLeaEncoding = false;
+ GenTreePtr memOp = nullptr;
+
+ bool hasImpliedFirstOperand = false;
+ GenTreeIntConCommon* imm = nullptr;
+ GenTreePtr other = nullptr;
+
+ // There are three forms of x86 multiply:
+ // one-op form: RDX:RAX = RAX * r/m
+ // two-op form: reg *= r/m
+ // three-op form: reg = r/m * imm
+
+ // This special widening 32x32->64 MUL is not used on x64
+ CLANG_FORMAT_COMMENT_ANCHOR;
+#if defined(_TARGET_X86_)
+ if (tree->OperGet() != GT_MUL_LONG)
+#endif
+ {
+ assert((tree->gtFlags & GTF_MUL_64RSLT) == 0);
+ }
+
+ // Multiply should never be using small types
+ assert(!varTypeIsSmall(tree->TypeGet()));
+
+ // We do use the widening multiply to implement
+ // the overflow checking for unsigned multiply
+ //
+ if (isUnsignedMultiply && requiresOverflowCheck)
+ {
+ // The only encoding provided is RDX:RAX = RAX * rm
+ //
+ // Here we set RAX as the only destination candidate
+ // In LSRA we set the kill set for this operation to RBM_RAX|RBM_RDX
+ //
+ info->setDstCandidates(m_lsra, RBM_RAX);
+ hasImpliedFirstOperand = true;
+ }
+ else if (tree->OperGet() == GT_MULHI)
+ {
+ // Have to use the encoding:RDX:RAX = RAX * rm. Since we only care about the
+ // upper 32 bits of the result set the destination candidate to REG_RDX.
+ info->setDstCandidates(m_lsra, RBM_RDX);
+ hasImpliedFirstOperand = true;
+ }
+#if defined(_TARGET_X86_)
+ else if (tree->OperGet() == GT_MUL_LONG)
+ {
+ // have to use the encoding:RDX:RAX = RAX * rm
+ info->setDstCandidates(m_lsra, RBM_RAX);
+ hasImpliedFirstOperand = true;
+ }
+#endif
+ else if (IsContainableImmed(tree, op2) || IsContainableImmed(tree, op1))
+ {
+ if (IsContainableImmed(tree, op2))
+ {
+ imm = op2->AsIntConCommon();
+ other = op1;
+ }
+ else
+ {
+ imm = op1->AsIntConCommon();
+ other = op2;
+ }
+
+ // CQ: We want to rewrite this into a LEA
+ ssize_t immVal = imm->AsIntConCommon()->IconValue();
+ if (!requiresOverflowCheck && (immVal == 3 || immVal == 5 || immVal == 9))
+ {
+ useLeaEncoding = true;
+ }
+
+ MakeSrcContained(tree, imm); // The imm is always contained
+ if (other->isMemoryOp())
+ {
+ memOp = other; // memOp may be contained below
+ }
+ }
+
+ // We allow one operand to be a contained memory operand.
+ // The memory op type must match with the 'tree' type.
+ // This is because during codegen we use 'tree' type to derive EmitTypeSize.
+ // E.g op1 type = byte, op2 type = byte but GT_MUL tree type is int.
+ //
+ if (memOp == nullptr && op2->isMemoryOp())
+ {
+ memOp = op2;
+ }
+
+ // To generate an LEA we need to force memOp into a register
+ // so don't allow memOp to be 'contained'
+ //
+ if (!useLeaEncoding)
+ {
+ if ((memOp != nullptr) && (memOp->TypeGet() == tree->TypeGet()) && IsSafeToContainMem(tree, memOp))
+ {
+ MakeSrcContained(tree, memOp);
+ }
+ else if (imm != nullptr)
+ {
+ // Has a contained immediate operand.
+ // Only 'other' operand can be marked as reg optional.
+ assert(other != nullptr);
+ SetRegOptional(other);
+ }
+ else if (hasImpliedFirstOperand)
+ {
+ // Only op2 can be marke as reg optional.
+ SetRegOptional(op2);
+ }
+ else
+ {
+ // If there are no containable operands, we can make either of op1 or op2
+ // as reg optional.
+ SetRegOptionalForBinOp(tree);
+ }
+ }
+}
+
+//------------------------------------------------------------------------------
+// SetContainsAVXFlags: Set ContainsAVX flag when it is floating type, set
+// Contains256bitAVX flag when SIMD vector size is 32 bytes
+//
+// Arguments:
+// isFloatingPointType - true if it is floating point type
+// sizeOfSIMDVector - SIMD Vector size
+//
+void Lowering::SetContainsAVXFlags(bool isFloatingPointType /* = true */, unsigned sizeOfSIMDVector /* = 0*/)
+{
+#ifdef FEATURE_AVX_SUPPORT
+ if (isFloatingPointType)
+ {
+ if (comp->getFloatingPointInstructionSet() == InstructionSet_AVX)
+ {
+ comp->getEmitter()->SetContainsAVX(true);
+ }
+ if (sizeOfSIMDVector == 32 && comp->getSIMDInstructionSet() == InstructionSet_AVX)
+ {
+ comp->getEmitter()->SetContains256bitAVX(true);
+ }
+ }
+#endif
+}
+
+#ifdef _TARGET_X86_
+//------------------------------------------------------------------------
+// ExcludeNonByteableRegisters: Determines if we need to exclude non-byteable registers for
+// various reasons
+//
+// Arguments:
+// tree - The node of interest
+//
+// Return Value:
+// If we need to exclude non-byteable registers
+//
+bool Lowering::ExcludeNonByteableRegisters(GenTree* tree)
+{
+ // Example1: GT_STOREIND(byte, addr, op2) - storeind of byte sized value from op2 into mem 'addr'
+ // Storeind itself will not produce any value and hence dstCount=0. But op2 could be TYP_INT
+ // value. In this case we need to exclude esi/edi from the src candidates of op2.
+ if (varTypeIsByte(tree))
+ {
+ return true;
+ }
+ // Example2: GT_CAST(int <- bool <- int) - here type of GT_CAST node is int and castToType is bool.
+ else if ((tree->OperGet() == GT_CAST) && varTypeIsByte(tree->CastToType()))
+ {
+ return true;
+ }
+ else if (tree->OperIsCompare())
+ {
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
+
+ // Example3: GT_EQ(int, op1 of type ubyte, op2 of type ubyte) - in this case codegen uses
+ // ubyte as the result of comparison and if the result needs to be materialized into a reg
+ // simply zero extend it to TYP_INT size. Here is an example of generated code:
+ // cmp dl, byte ptr[addr mode]
+ // movzx edx, dl
+ if (varTypeIsByte(op1) && varTypeIsByte(op2))
+ {
+ return true;
+ }
+ // Example4: GT_EQ(int, op1 of type ubyte, op2 is GT_CNS_INT) - in this case codegen uses
+ // ubyte as the result of the comparison and if the result needs to be materialized into a reg
+ // simply zero extend it to TYP_INT size.
+ else if (varTypeIsByte(op1) && op2->IsCnsIntOrI())
+ {
+ return true;
+ }
+ // Example4: GT_EQ(int, op1 is GT_CNS_INT, op2 of type ubyte) - in this case codegen uses
+ // ubyte as the result of the comparison and if the result needs to be materialized into a reg
+ // simply zero extend it to TYP_INT size.
+ else if (op1->IsCnsIntOrI() && varTypeIsByte(op2))
+ {
+ return true;
+ }
+ else
+ {
+ return false;
+ }
+ }
+#ifdef FEATURE_SIMD
+ else if (tree->OperGet() == GT_SIMD)
+ {
+ GenTreeSIMD* simdNode = tree->AsSIMD();
+ switch (simdNode->gtSIMDIntrinsicID)
+ {
+ case SIMDIntrinsicOpEquality:
+ case SIMDIntrinsicOpInEquality:
+ // We manifest it into a byte register, so the target must be byteable.
+ return true;
+
+ case SIMDIntrinsicGetItem:
+ {
+ // This logic is duplicated from genSIMDIntrinsicGetItem().
+ // When we generate code for a SIMDIntrinsicGetItem, under certain circumstances we need to
+ // generate a movzx/movsx. On x86, these require byteable registers. So figure out which
+ // cases will require this, so the non-byteable registers can be excluded.
+
+ GenTree* op1 = simdNode->gtGetOp1();
+ GenTree* op2 = simdNode->gtGetOp2();
+ var_types baseType = simdNode->gtSIMDBaseType;
+ if (!op1->isMemoryOp() && op2->IsCnsIntOrI() && varTypeIsSmallInt(baseType))
+ {
+ bool ZeroOrSignExtnReqd = true;
+ unsigned baseSize = genTypeSize(baseType);
+ if (baseSize == 1)
+ {
+ if ((op2->gtIntCon.gtIconVal % 2) == 1)
+ {
+ ZeroOrSignExtnReqd = (baseType == TYP_BYTE);
+ }
+ }
+ else
+ {
+ assert(baseSize == 2);
+ ZeroOrSignExtnReqd = (baseType == TYP_SHORT);
+ }
+ return ZeroOrSignExtnReqd;
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+ return false;
+ }
+#endif // FEATURE_SIMD
+ else
+ {
+ return false;
+ }
+}
+#endif // _TARGET_X86_
+
+#endif // _TARGET_XARCH_
+
+#endif // !LEGACY_BACKEND
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index 678bb34c54..dabca57710 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -855,9 +855,12 @@ fgArgInfo::fgArgInfo(Compiler* comp, GenTreePtr call, unsigned numArgs)
compiler = comp;
callTree = call;
assert(call->IsCall());
- argCount = 0; // filled in arg count, starts at zero
- nextSlotNum = INIT_ARG_STACK_SLOT;
- stkLevel = 0;
+ argCount = 0; // filled in arg count, starts at zero
+ nextSlotNum = INIT_ARG_STACK_SLOT;
+ stkLevel = 0;
+#if defined(UNIX_X86_ABI)
+ padStkAlign = 0;
+#endif
argTableSize = numArgs; // the allocated table size
hasRegArgs = false;
@@ -897,9 +900,12 @@ fgArgInfo::fgArgInfo(GenTreePtr newCall, GenTreePtr oldCall)
;
callTree = newCall;
assert(newCall->IsCall());
- argCount = 0; // filled in arg count, starts at zero
- nextSlotNum = INIT_ARG_STACK_SLOT;
- stkLevel = oldArgInfo->stkLevel;
+ argCount = 0; // filled in arg count, starts at zero
+ nextSlotNum = INIT_ARG_STACK_SLOT;
+ stkLevel = oldArgInfo->stkLevel;
+#if defined(UNIX_X86_ABI)
+ padStkAlign = oldArgInfo->padStkAlign;
+#endif
argTableSize = oldArgInfo->argTableSize;
argsComplete = false;
argTable = nullptr;
@@ -1079,16 +1085,19 @@ fgArgTabEntryPtr fgArgInfo::AddRegArg(
{
fgArgTabEntryPtr curArgTabEntry = new (compiler, CMK_fgArgInfo) fgArgTabEntry;
- curArgTabEntry->argNum = argNum;
- curArgTabEntry->node = node;
- curArgTabEntry->parent = parent;
- curArgTabEntry->regNum = regNum;
- curArgTabEntry->slotNum = 0;
- curArgTabEntry->numRegs = numRegs;
- curArgTabEntry->numSlots = 0;
- curArgTabEntry->alignment = alignment;
- curArgTabEntry->lateArgInx = (unsigned)-1;
- curArgTabEntry->tmpNum = (unsigned)-1;
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = regNum;
+ curArgTabEntry->slotNum = 0;
+ curArgTabEntry->numRegs = numRegs;
+ curArgTabEntry->numSlots = 0;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
+#if defined(UNIX_X86_ABI)
+ curArgTabEntry->padStkAlign = 0;
+#endif
curArgTabEntry->isSplit = false;
curArgTabEntry->isTmp = false;
curArgTabEntry->needTmp = false;
@@ -1154,16 +1163,19 @@ fgArgTabEntryPtr fgArgInfo::AddStkArg(unsigned argNum,
curArgTabEntry->isStruct = isStruct; // is this a struct arg
#endif // defined(FEATURE_UNIX_AMD64_STRUCT_PASSING)
- curArgTabEntry->argNum = argNum;
- curArgTabEntry->node = node;
- curArgTabEntry->parent = parent;
- curArgTabEntry->regNum = REG_STK;
- curArgTabEntry->slotNum = nextSlotNum;
- curArgTabEntry->numRegs = 0;
- curArgTabEntry->numSlots = numSlots;
- curArgTabEntry->alignment = alignment;
- curArgTabEntry->lateArgInx = (unsigned)-1;
- curArgTabEntry->tmpNum = (unsigned)-1;
+ curArgTabEntry->argNum = argNum;
+ curArgTabEntry->node = node;
+ curArgTabEntry->parent = parent;
+ curArgTabEntry->regNum = REG_STK;
+ curArgTabEntry->slotNum = nextSlotNum;
+ curArgTabEntry->numRegs = 0;
+ curArgTabEntry->numSlots = numSlots;
+ curArgTabEntry->alignment = alignment;
+ curArgTabEntry->lateArgInx = (unsigned)-1;
+ curArgTabEntry->tmpNum = (unsigned)-1;
+#if defined(UNIX_X86_ABI)
+ curArgTabEntry->padStkAlign = 0;
+#endif
curArgTabEntry->isSplit = false;
curArgTabEntry->isTmp = false;
curArgTabEntry->needTmp = false;
@@ -1689,6 +1701,52 @@ void fgArgInfo::ArgsComplete()
argsComplete = true;
}
+#if defined(UNIX_X86_ABI)
+// Get the stack alignment value for a Call holding this object
+//
+// NOTE: This function will calculate number of padding slots, to align the
+// stack before pushing arguments to the stack. Padding value is stored in
+// the first argument in fgArgTabEntry structure padStkAlign member so that
+// code (sub esp, n) can be emitted before generating argument push in
+// fgArgTabEntry node. As of result stack will be aligned right before
+// making a "Call". After the Call, stack is re-adjusted to the value it
+// was with fgArgInfo->padStkAlign value as we cann't use the one in fgArgTabEntry.
+//
+void fgArgInfo::ArgsAlignPadding()
+{
+ // To get the padding amount, sum up all the slots and get the remainder for padding
+ unsigned curInx;
+ unsigned numSlots = 0;
+ fgArgTabEntryPtr firstArgTabEntry = nullptr;
+
+ for (curInx = 0; curInx < argCount; curInx++)
+ {
+ fgArgTabEntryPtr curArgTabEntry = argTable[curInx];
+ if (curArgTabEntry->numSlots > 0)
+ {
+ // The argument may be REG_STK or constant or register that goes to stack
+ assert(nextSlotNum >= curArgTabEntry->slotNum);
+
+ numSlots += curArgTabEntry->numSlots;
+ if (firstArgTabEntry == nullptr)
+ {
+ // First argument will be used to hold the padding amount
+ firstArgTabEntry = curArgTabEntry;
+ }
+ }
+ }
+
+ if (firstArgTabEntry != nullptr)
+ {
+ const int numSlotsAligned = STACK_ALIGN / TARGET_POINTER_SIZE;
+ // Set stack align pad for the first argument
+ firstArgTabEntry->padStkAlign = AlignmentPad(numSlots, numSlotsAligned);
+ // Set also for fgArgInfo that will be used to reset stack pointer after the Call
+ this->padStkAlign = firstArgTabEntry->padStkAlign;
+ }
+}
+#endif // UNIX_X86_ABI
+
void fgArgInfo::SortArgs()
{
assert(argsComplete == true);
@@ -2431,6 +2489,22 @@ void fgArgInfo::EvalArgsToTemps()
#endif
}
+// Get the late arg for arg at position argIndex.
+// argIndex - 0-based position to get late arg for.
+// Caller must ensure this position has a late arg.
+GenTreePtr fgArgInfo::GetLateArg(unsigned argIndex)
+{
+ for (unsigned j = 0; j < this->ArgCount(); j++)
+ {
+ if (this->ArgTable()[j]->argNum == argIndex)
+ {
+ return this->ArgTable()[j]->node;
+ }
+ }
+ // Caller must ensure late arg exists.
+ unreached();
+}
+
void fgArgInfo::RecordStkLevel(unsigned stkLvl)
{
assert(!IsUninitialized(stkLvl));
@@ -4211,6 +4285,11 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* callNode)
if (!reMorphing)
{
call->fgArgInfo->ArgsComplete();
+
+#if defined(UNIX_X86_ABI)
+ call->fgArgInfo->ArgsAlignPadding();
+#endif // UNIX_X86_ABI
+
#ifdef LEGACY_BACKEND
call->gtCallRegUsedMask = genIntAllRegArgMask(intArgRegNum);
#if defined(_TARGET_ARM_)
@@ -5629,8 +5708,13 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
// to ensure that the same values are used in the bounds check and the actual
// dereference.
// Also we allocate the temporary when the arrRef is sufficiently complex/expensive.
+ // Note that if 'arrRef' is a GT_FIELD, it has not yet been morphed so its true
+ // complexity is not exposed. (Without that condition there are cases of local struct
+ // fields that were previously, needlessly, marked as GTF_GLOB_REF, and when that was
+ // fixed, there were some regressions that were mostly ameliorated by adding this condition.)
//
- if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY))
+ if ((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) ||
+ gtComplexityExceeds(&arrRef, MAX_ARR_COMPLEXITY) || (arrRef->OperGet() == GT_FIELD))
{
unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
arrRefDefn = gtNewTempAssign(arrRefTmpNum, arrRef);
@@ -5649,7 +5733,8 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
// dereference.
// Also we allocate the temporary when the index is sufficiently complex/expensive.
//
- if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY))
+ if ((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) || gtComplexityExceeds(&index, MAX_ARR_COMPLEXITY) ||
+ (arrRef->OperGet() == GT_FIELD))
{
unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
indexDefn = gtNewTempAssign(indexTmpNum, index);
@@ -5683,7 +5768,7 @@ GenTreePtr Compiler::fgMorphArrayIndex(GenTreePtr tree)
}
GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
- GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, index, SCK_RNGCHK_FAIL);
+ GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, SCK_RNGCHK_FAIL);
bndsChk = arrBndsChk;
@@ -6051,14 +6136,15 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
{
assert(tree->gtOper == GT_FIELD);
- noway_assert(tree->gtFlags & GTF_GLOB_REF);
-
CORINFO_FIELD_HANDLE symHnd = tree->gtField.gtFldHnd;
unsigned fldOffset = tree->gtField.gtFldOffset;
GenTreePtr objRef = tree->gtField.gtFldObj;
bool fieldMayOverlap = false;
bool objIsLocal = false;
+ noway_assert(((objRef != nullptr) && (objRef->IsLocalAddrExpr() != nullptr)) ||
+ ((tree->gtFlags & GTF_GLOB_REF) != 0));
+
if (tree->gtField.gtFldMayOverlap)
{
fieldMayOverlap = true;
@@ -6067,8 +6153,8 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
}
#ifdef FEATURE_SIMD
- // if this field belongs to simd struct, tranlate it to simd instrinsic.
- if (mac == nullptr || mac->m_kind != MACK_Addr)
+ // if this field belongs to simd struct, translate it to simd instrinsic.
+ if (mac == nullptr)
{
GenTreePtr newTree = fgMorphFieldToSIMDIntrinsicGet(tree);
if (newTree != tree)
@@ -6077,13 +6163,6 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
return newTree;
}
}
- else if (objRef != nullptr && objRef->OperGet() == GT_ADDR && objRef->OperIsSIMD())
- {
- // We have a field of an SIMD intrinsic in an address-taken context.
- // We need to copy the SIMD result to a temp, and take the field of that.
- GenTree* copy = fgCopySIMDNode(objRef->gtOp.gtOp1->AsSIMD());
- objRef->gtOp.gtOp1 = copy;
- }
#endif
/* Is this an instance data member? */
@@ -6468,6 +6547,11 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
addr->gtIntCon.gtFieldSeq = fieldSeq;
tree->SetOper(GT_IND);
+ // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
+ // We must clear it when we transform the node.
+ // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
+ // that the logic above does its own checking to determine whether a nullcheck is needed.
+ tree->gtFlags &= ~GTF_IND_ARR_LEN;
tree->gtOp.gtOp1 = addr;
return fgMorphSmpOp(tree);
@@ -6507,6 +6591,11 @@ GenTreePtr Compiler::fgMorphField(GenTreePtr tree, MorphAddrContext* mac)
}
}
noway_assert(tree->gtOper == GT_IND);
+ // The GTF_FLD_NULLCHECK is the same bit as GTF_IND_ARR_LEN.
+ // We must clear it when we transform the node.
+ // TODO-Cleanup: It appears that the GTF_FLD_NULLCHECK flag is never checked, and note
+ // that the logic above does its own checking to determine whether a nullcheck is needed.
+ tree->gtFlags &= ~GTF_IND_ARR_LEN;
GenTreePtr res = fgMorphSmpOp(tree);
@@ -8467,7 +8556,7 @@ GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
// The SIMD type in question could be Vector2f which is 8-bytes in size.
// The below check is to make sure that we don't turn that copyblk
// into a assignment, since rationalizer logic will transform the
- // copyblk apropriately. Otherwise, the transormation made in this
+ // copyblk appropriately. Otherwise, the transformation made in this
// routine will prevent rationalizer logic and we might end up with
// GT_ADDR(GT_SIMD) node post rationalization, leading to a noway assert
// in codegen.
@@ -8495,6 +8584,12 @@ GenTreePtr Compiler::fgMorphOneAsgBlockOp(GenTreePtr tree)
}
else
{
+ // Is this an enregisterable struct that is already a simple assignment?
+ // This can happen if we are re-morphing.
+ if ((dest->OperGet() == GT_IND) && (dest->TypeGet() != TYP_STRUCT) && isCopyBlock)
+ {
+ return tree;
+ }
noway_assert(dest->OperIsLocal());
lclVarTree = dest;
destVarNum = lclVarTree->AsLclVarCommon()->gtLclNum;
@@ -9185,7 +9280,7 @@ GenTree* Compiler::fgMorphBlkNode(GenTreePtr tree, bool isDest)
// Therefore, we insert a GT_ADDR just above the node, and wrap it in an obj or ind.
// TODO-1stClassStructs: Consider whether this can be improved.
// Also consider whether some of this can be included in gtNewBlockVal (though note
- // that doing so may cause us to query the type system before we otherwise would.
+ // that doing so may cause us to query the type system before we otherwise would).
GenTree* lastComma = nullptr;
for (GenTree* next = tree; next != nullptr && next->gtOper == GT_COMMA; next = next->gtGetOp2())
{
@@ -10223,31 +10318,46 @@ _Done:
// FP architectures
GenTree* Compiler::fgMorphForRegisterFP(GenTree* tree)
{
- GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
-
- if (tree->OperIsArithmetic() && varTypeIsFloating(tree))
+ if (tree->OperIsArithmetic())
{
- if (op1->TypeGet() != tree->TypeGet())
- {
- tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp1, tree->TypeGet());
- }
- if (op2->TypeGet() != tree->TypeGet())
+ if (varTypeIsFloating(tree))
{
- tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), tree->gtOp.gtOp2, tree->TypeGet());
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+ GenTreePtr op2 = tree->gtGetOp2();
+
+ if (op1->TypeGet() != tree->TypeGet())
+ {
+ tree->gtOp.gtOp1 = gtNewCastNode(tree->TypeGet(), op1, tree->TypeGet());
+ }
+ if (op2->TypeGet() != tree->TypeGet())
+ {
+ tree->gtOp.gtOp2 = gtNewCastNode(tree->TypeGet(), op2, tree->TypeGet());
+ }
}
}
- else if (tree->OperIsCompare() && varTypeIsFloating(op1) && op1->TypeGet() != op2->TypeGet())
+ else if (tree->OperIsCompare())
{
- // both had better be floating, just one bigger than other
- assert(varTypeIsFloating(op2));
- if (op1->TypeGet() == TYP_FLOAT)
- {
- tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp1, TYP_DOUBLE);
- }
- else if (op2->TypeGet() == TYP_FLOAT)
+ GenTreePtr op1 = tree->gtOp.gtOp1;
+
+ if (varTypeIsFloating(op1))
{
- tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, tree->gtOp.gtOp2, TYP_DOUBLE);
+ GenTreePtr op2 = tree->gtGetOp2();
+ assert(varTypeIsFloating(op2));
+
+ if (op1->TypeGet() != op2->TypeGet())
+ {
+ // both had better be floating, just one bigger than other
+ if (op1->TypeGet() == TYP_FLOAT)
+ {
+ assert(op2->TypeGet() == TYP_DOUBLE);
+ tree->gtOp.gtOp1 = gtNewCastNode(TYP_DOUBLE, op1, TYP_DOUBLE);
+ }
+ else if (op2->TypeGet() == TYP_FLOAT)
+ {
+ assert(op1->TypeGet() == TYP_DOUBLE);
+ tree->gtOp.gtOp2 = gtNewCastNode(TYP_DOUBLE, op2, TYP_DOUBLE);
+ }
+ }
}
}
@@ -10323,50 +10433,6 @@ GenTree* Compiler::fgMorphRecognizeBoxNullable(GenTree* compare)
#ifdef FEATURE_SIMD
-//--------------------------------------------------------------------------------------
-// fgCopySIMDNode: make a copy of a SIMD intrinsic node, e.g. so that a field can be accessed.
-//
-// Arguments:
-// simdNode - The GenTreeSIMD node to be copied
-//
-// Return Value:
-// A comma node where op1 is the assignment of the simd node to a temp, and op2 is the temp lclVar.
-//
-GenTree* Compiler::fgCopySIMDNode(GenTreeSIMD* simdNode)
-{
- // Copy the result of the SIMD intrinsic into a temp.
- unsigned lclNum = lvaGrabTemp(true DEBUGARG("Copy of SIMD intrinsic with field access"));
-
- CORINFO_CLASS_HANDLE simdHandle = NO_CLASS_HANDLE;
- // We only have fields of the fixed float vectors.
- noway_assert(simdNode->gtSIMDBaseType == TYP_FLOAT);
- switch (simdNode->gtSIMDSize)
- {
- case 8:
- simdHandle = SIMDVector2Handle;
- break;
- case 12:
- simdHandle = SIMDVector3Handle;
- break;
- case 16:
- simdHandle = SIMDVector4Handle;
- break;
- default:
- noway_assert(!"field of unexpected SIMD type");
- break;
- }
- assert(simdHandle != NO_CLASS_HANDLE);
-
- lvaSetStruct(lclNum, simdHandle, false, true);
- lvaTable[lclNum].lvFieldAccessed = true;
-
- GenTree* asg = gtNewTempAssign(lclNum, simdNode);
- GenTree* newLclVarNode = new (this, GT_LCL_VAR) GenTreeLclVar(simdNode->TypeGet(), lclNum, BAD_IL_OFFSET);
-
- GenTree* comma = gtNewOperNode(GT_COMMA, simdNode->TypeGet(), asg, newLclVarNode);
- return comma;
-}
-
//--------------------------------------------------------------------------------------------------------------
// getSIMDStructFromField:
// Checking whether the field belongs to a simd struct or not. If it is, return the GenTreePtr for
@@ -10449,12 +10515,12 @@ GenTreePtr Compiler::getSIMDStructFromField(GenTreePtr tree,
}
/*****************************************************************************
-* If a read operation tries to access simd struct field, then transform the this
-* operation to to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
+* If a read operation tries to access simd struct field, then transform the
+* operation to the SIMD intrinsic SIMDIntrinsicGetItem, and return the new tree.
* Otherwise, return the old tree.
* Argument:
* tree - GenTreePtr. If this pointer points to simd struct which is used for simd
-* intrinsic. We will morph it as simd intrinsic SIMDIntrinsicGetItem.
+* intrinsic, we will morph it as simd intrinsic SIMDIntrinsicGetItem.
* Return:
* A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
* return nullptr.
@@ -10468,7 +10534,6 @@ GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree)
GenTreePtr simdStructNode = getSIMDStructFromField(tree, &baseType, &index, &simdSize);
if (simdStructNode != nullptr)
{
-
assert(simdSize >= ((index + 1) * genTypeSize(baseType)));
GenTree* op2 = gtNewIconNode(index);
tree = gtNewSIMDNode(baseType, simdStructNode, op2, SIMDIntrinsicGetItem, baseType, simdSize);
@@ -10481,11 +10546,11 @@ GenTreePtr Compiler::fgMorphFieldToSIMDIntrinsicGet(GenTreePtr tree)
/*****************************************************************************
* Transform an assignment of a SIMD struct field to SIMD intrinsic
-* SIMDIntrinsicGetItem, and return a new tree. If If it is not such an assignment,
+* SIMDIntrinsicSet*, and return a new tree. If it is not such an assignment,
* then return the old tree.
* Argument:
* tree - GenTreePtr. If this pointer points to simd struct which is used for simd
-* intrinsic. We will morph it as simd intrinsic set.
+* intrinsic, we will morph it as simd intrinsic set.
* Return:
* A GenTreePtr which points to the new tree. If the tree is not for simd intrinsic,
* return nullptr.
@@ -10538,7 +10603,8 @@ GenTreePtr Compiler::fgMorphFieldAssignToSIMDIntrinsicSet(GenTreePtr tree)
return tree;
}
-#endif
+#endif // FEATURE_SIMD
+
/*****************************************************************************
*
* Transform the given GTK_SMPOP tree for code generation.
@@ -10584,7 +10650,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
genTreeOps oper = tree->OperGet();
var_types typ = tree->TypeGet();
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
/*-------------------------------------------------------------------------
* First do any PRE-ORDER processing
@@ -10998,6 +11064,9 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
//
// a % b = a - (a / b) * b;
//
+ // NOTE: we should never need to perform this transformation when remorphing, since global morphing
+ // should already have done so and we do not introduce new modulus nodes in later phases.
+ assert(!optValnumCSE_phase);
tree = fgMorphModToSubMulDiv(tree->AsOp());
op1 = tree->gtOp.gtOp1;
op2 = tree->gtOp.gtOp2;
@@ -11010,7 +11079,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
// the redundant division. If there's no redundant division then
// nothing is lost, lowering would have done this transform anyway.
- if ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst())
+ if (!optValnumCSE_phase && ((tree->OperGet() == GT_MOD) && op2->IsIntegralConst()))
{
ssize_t divisorValue = op2->AsIntCon()->IconValue();
size_t absDivisorValue = (divisorValue == SSIZE_T_MIN) ? static_cast<size_t>(divisorValue)
@@ -11206,7 +11275,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
}
fgMorphRecognizeBoxNullable(tree);
op1 = tree->gtOp.gtOp1;
- op2 = tree->gtGetOp2();
+ op2 = tree->gtGetOp2IfPresent();
break;
@@ -11297,12 +11366,6 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
// comma list. The left arg (op1) gets a fresh context.
subMac1 = nullptr;
break;
- case GT_ASG:
- if (tree->OperIsBlkOp())
- {
- subMac1 = &subIndMac1;
- }
- break;
case GT_OBJ:
case GT_BLK:
case GT_DYN_BLK:
@@ -11440,12 +11503,6 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
}
}
break;
- case GT_ASG:
- if (tree->OperIsBlkOp())
- {
- mac = &subIndMac2;
- }
- break;
default:
break;
}
@@ -11611,7 +11668,7 @@ GenTreePtr Compiler::fgMorphSmpOp(GenTreePtr tree, MorphAddrContext* mac)
/* gtFoldExpr could have changed op1 and op2 */
op1 = tree->gtOp.gtOp1;
- op2 = tree->gtGetOp2();
+ op2 = tree->gtGetOp2IfPresent();
// Do we have an integer compare operation?
//
@@ -13508,12 +13565,10 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
/* and also "a = x <op> a" into "a <op>= x" for communative ops */
CLANG_FORMAT_COMMENT_ANCHOR;
-#if !LONG_ASG_OPS
if (typ == TYP_LONG)
{
break;
}
-#endif
if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
{
@@ -13669,25 +13724,9 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree)
case GT_LSH:
case GT_RSH:
case GT_RSZ:
-
-#if LONG_ASG_OPS
-
- if (typ == TYP_LONG)
- break;
-#endif
-
case GT_OR:
case GT_XOR:
case GT_AND:
-
-#if LONG_ASG_OPS
-
- /* TODO: allow non-const long assignment operators */
-
- if (typ == TYP_LONG && op2->gtOp.gtOp2->gtOper != GT_CNS_LNG)
- break;
-#endif
-
ASG_OP:
{
bool bReverse = false;
@@ -14048,11 +14087,21 @@ GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
{
numerator = fgMakeMultiUse(&tree->gtOp1);
}
+ else if (lvaLocalVarRefCounted && numerator->OperIsLocal())
+ {
+ // Morphing introduces new lclVar references. Increase ref counts
+ lvaIncRefCnts(numerator);
+ }
if (!denominator->OperIsLeaf())
{
denominator = fgMakeMultiUse(&tree->gtOp2);
}
+ else if (lvaLocalVarRefCounted && denominator->OperIsLocal())
+ {
+ // Morphing introduces new lclVar references. Increase ref counts
+ lvaIncRefCnts(denominator);
+ }
// The numerator and denominator may have been assigned to temps, in which case
// their defining assignments are in the current tree. Therefore, we need to
@@ -14335,7 +14384,7 @@ GenTreePtr Compiler::fgMorphToEmulatedFP(GenTreePtr tree)
genTreeOps oper = tree->OperGet();
var_types typ = tree->TypeGet();
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
/*
We have to use helper calls for all FP operations:
@@ -14683,8 +14732,8 @@ GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
fgSetRngChkTarget(tree);
GenTreeBoundsChk* bndsChk = tree->AsBoundsChk();
- bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen);
bndsChk->gtIndex = fgMorphTree(bndsChk->gtIndex);
+ bndsChk->gtArrLen = fgMorphTree(bndsChk->gtArrLen);
// If the index is a comma(throw, x), just return that.
if (!optValnumCSE_phase && fgIsCommaThrow(bndsChk->gtIndex))
{
@@ -14692,8 +14741,8 @@ GenTreePtr Compiler::fgMorphTree(GenTreePtr tree, MorphAddrContext* mac)
}
// Propagate effects flags upwards
- bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
bndsChk->gtFlags |= (bndsChk->gtIndex->gtFlags & GTF_ALL_EFFECT);
+ bndsChk->gtFlags |= (bndsChk->gtArrLen->gtFlags & GTF_ALL_EFFECT);
// Otherwise, we don't change the tree.
}
@@ -15972,7 +16021,6 @@ void Compiler::fgMorphBlocks()
// genReturnLocal
noway_assert(ret->OperGet() == GT_RETURN);
noway_assert(ret->gtGetOp1() != nullptr);
- noway_assert(ret->gtGetOp2() == nullptr);
GenTreePtr tree = gtNewTempAssign(genReturnLocal, ret->gtGetOp1());
@@ -15991,7 +16039,6 @@ void Compiler::fgMorphBlocks()
// Must be a void GT_RETURN with null operand; delete it as this block branches to oneReturn block
noway_assert(ret->TypeGet() == TYP_VOID);
noway_assert(ret->gtGetOp1() == nullptr);
- noway_assert(ret->gtGetOp2() == nullptr);
fgRemoveStmt(block, last);
}
@@ -16897,6 +16944,20 @@ void Compiler::fgMorph()
fgDebugCheckBBlist(false, false);
#endif // DEBUG
+ fgRemoveEmptyTry();
+
+ EndPhase(PHASE_EMPTY_TRY);
+
+ fgRemoveEmptyFinally();
+
+ EndPhase(PHASE_EMPTY_FINALLY);
+
+ fgCloneFinally();
+
+ EndPhase(PHASE_CLONE_FINALLY);
+
+ fgUpdateFinallyTargetFlags();
+
/* For x64 and ARM64 we need to mark irregular parameters early so that they don't get promoted */
fgMarkImplicitByRefArgs();
@@ -17002,6 +17063,14 @@ void Compiler::fgPromoteStructs()
return;
}
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nlvaTable before fgPromoteStructs\n");
+ lvaTableDump();
+ }
+#endif // DEBUG
+
// The lvaTable might grow as we grab temps. Make a local copy here.
unsigned startLvaCount = lvaCount;
@@ -17019,17 +17088,13 @@ void Compiler::fgPromoteStructs()
bool promotedVar = false;
LclVarDsc* varDsc = &lvaTable[lclNum];
-#ifdef FEATURE_SIMD
- if (varDsc->lvSIMDType && varDsc->lvUsedInSIMDIntrinsic)
+ if (varDsc->lvIsSIMDType() && varDsc->lvIsUsedInSIMDIntrinsic())
{
// If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
// its fields. Instead, we will attempt to enregister the entire struct.
varDsc->lvRegStruct = true;
}
- else
-#endif // FEATURE_SIMD
- // Don't promote if we have reached the tracking limit.
- if (lvaHaveManyLocals())
+ else if (lvaHaveManyLocals()) // Don't promote if we have reached the tracking limit.
{
// Print the message first time when we detected this condition
if (!tooManyLocals)
@@ -17060,7 +17125,6 @@ void Compiler::fgPromoteStructs()
if (canPromote)
{
-
// We *can* promote; *should* we promote?
// We should only do so if promotion has potential savings. One source of savings
// is if a field of the struct is accessed, since this access will be turned into
@@ -17068,9 +17132,17 @@ void Compiler::fgPromoteStructs()
// field accesses, but only block-level operations on the whole struct, if the struct
// has only one or two fields, then doing those block operations field-wise is probably faster
// than doing a whole-variable block operation (e.g., a hardware "copy loop" on x86).
- // So if no fields are accessed independently, and there are three or more fields,
+ // Struct promotion also provides the following benefits: reduce stack frame size,
+ // reduce the need for zero init of stack frame and fine grained constant/copy prop.
+ // Asm diffs indicate that promoting structs up to 3 fields is a net size win.
+ // So if no fields are accessed independently, and there are four or more fields,
// then do not promote.
- if (structPromotionInfo.fieldCnt > 2 && !varDsc->lvFieldAccessed)
+ //
+ // TODO: Ideally we would want to consider the impact of whether the struct is
+ // passed as a parameter or assigned the return value of a call. Because once promoted,
+ // struct copying is done by field by field assignment instead of a more efficient
+ // rep.stos or xmm reg based copy.
+ if (structPromotionInfo.fieldCnt > 3 && !varDsc->lvFieldAccessed)
{
JITDUMP("Not promoting promotable struct local V%02u: #fields = %d, fieldAccessed = %d.\n", lclNum,
structPromotionInfo.fieldCnt, varDsc->lvFieldAccessed);
@@ -17185,114 +17257,115 @@ void Compiler::fgPromoteStructs()
}
#endif // FEATURE_SIMD
}
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf("\nlvaTable after fgPromoteStructs\n");
+ lvaTableDump();
+ }
+#endif // DEBUG
}
Compiler::fgWalkResult Compiler::fgMorphStructField(GenTreePtr tree, fgWalkData* fgWalkPre)
{
noway_assert(tree->OperGet() == GT_FIELD);
- noway_assert(tree->gtFlags & GTF_GLOB_REF);
GenTreePtr objRef = tree->gtField.gtFldObj;
+ GenTreePtr obj = ((objRef != nullptr) && (objRef->gtOper == GT_ADDR)) ? objRef->gtOp.gtOp1 : nullptr;
+ noway_assert((tree->gtFlags & GTF_GLOB_REF) || ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR)));
/* Is this an instance data member? */
- if (objRef)
+ if ((obj != nullptr) && (obj->gtOper == GT_LCL_VAR))
{
- if (objRef->gtOper == GT_ADDR)
- {
- GenTreePtr obj = objRef->gtOp.gtOp1;
+ unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
+ LclVarDsc* varDsc = &lvaTable[lclNum];
- if (obj->gtOper == GT_LCL_VAR)
+ if (varTypeIsStruct(obj))
+ {
+ if (varDsc->lvPromoted)
{
- unsigned lclNum = obj->gtLclVarCommon.gtLclNum;
- LclVarDsc* varDsc = &lvaTable[lclNum];
+ // Promoted struct
+ unsigned fldOffset = tree->gtField.gtFldOffset;
+ unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
+ noway_assert(fieldLclIndex != BAD_VAR_NUM);
+
+ tree->SetOper(GT_LCL_VAR);
+ tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
+ tree->gtType = lvaTable[fieldLclIndex].TypeGet();
+ tree->gtFlags &= GTF_NODE_MASK;
+ tree->gtFlags &= ~GTF_GLOB_REF;
- if (varTypeIsStruct(obj))
+ GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+ if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
{
- if (varDsc->lvPromoted)
- {
- // Promoted struct
- unsigned fldOffset = tree->gtField.gtFldOffset;
- unsigned fieldLclIndex = lvaGetFieldLocal(varDsc, fldOffset);
- noway_assert(fieldLclIndex != BAD_VAR_NUM);
-
- tree->SetOper(GT_LCL_VAR);
- tree->gtLclVarCommon.SetLclNum(fieldLclIndex);
- tree->gtType = lvaTable[fieldLclIndex].TypeGet();
- tree->gtFlags &= GTF_NODE_MASK;
- tree->gtFlags &= ~GTF_GLOB_REF;
-
- GenTreePtr parent = fgWalkPre->parentStack->Index(1);
- if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
- {
- tree->gtFlags |= GTF_VAR_DEF;
- tree->gtFlags |= GTF_DONT_CSE;
- }
-#ifdef DEBUG
- if (verbose)
- {
- printf("Replacing the field in promoted struct with a local var:\n");
- fgWalkPre->printModified = true;
- }
-#endif // DEBUG
- return WALK_SKIP_SUBTREES;
- }
+ tree->gtFlags |= GTF_VAR_DEF;
+ tree->gtFlags |= GTF_DONT_CSE;
}
- else
+#ifdef DEBUG
+ if (verbose)
{
- // Normed struct
- // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
- // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
- // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
- // there is one extremely rare case where that won't be true. An enum type is a special value type
- // that contains exactly one element of a primitive integer type (that, for CLS programs is named
- // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
- // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
- // ldfld. For example:
- //
- // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
- // {
- // .field public specialname rtspecialname int16 value__
- // .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
- // }
- // .method public hidebysig static void Main() cil managed
- // {
- // .locals init (valuetype mynamespace.e_t V_0)
- // ...
- // ldloca.s V_0
- // ldflda int16 mynamespace.e_t::value__
- // ...
- // }
- //
- // Normally, compilers will not generate the ldflda, since it is superfluous.
- //
- // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
- // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
- // mismatch like this, don't do this morphing. The local var may end up getting marked as
- // address taken, and the appropriate SHORT load will be done from memory in that case.
+ printf("Replacing the field in promoted struct with a local var:\n");
+ fgWalkPre->printModified = true;
+ }
+#endif // DEBUG
+ return WALK_SKIP_SUBTREES;
+ }
+ }
+ else
+ {
+ // Normed struct
+ // A "normed struct" is a struct that the VM tells us is a basic type. This can only happen if
+ // the struct contains a single element, and that element is 4 bytes (on x64 it can also be 8
+ // bytes). Normally, the type of the local var and the type of GT_FIELD are equivalent. However,
+ // there is one extremely rare case where that won't be true. An enum type is a special value type
+ // that contains exactly one element of a primitive integer type (that, for CLS programs is named
+ // "value__"). The VM tells us that a local var of that enum type is the primitive type of the
+ // enum's single field. It turns out that it is legal for IL to access this field using ldflda or
+ // ldfld. For example:
+ //
+ // .class public auto ansi sealed mynamespace.e_t extends [mscorlib]System.Enum
+ // {
+ // .field public specialname rtspecialname int16 value__
+ // .field public static literal valuetype mynamespace.e_t one = int16(0x0000)
+ // }
+ // .method public hidebysig static void Main() cil managed
+ // {
+ // .locals init (valuetype mynamespace.e_t V_0)
+ // ...
+ // ldloca.s V_0
+ // ldflda int16 mynamespace.e_t::value__
+ // ...
+ // }
+ //
+ // Normally, compilers will not generate the ldflda, since it is superfluous.
+ //
+ // In the example, the lclVar is short, but the JIT promotes all trees using this local to the
+ // "actual type", that is, INT. But the GT_FIELD is still SHORT. So, in the case of a type
+ // mismatch like this, don't do this morphing. The local var may end up getting marked as
+ // address taken, and the appropriate SHORT load will be done from memory in that case.
- if (tree->TypeGet() == obj->TypeGet())
- {
- tree->ChangeOper(GT_LCL_VAR);
- tree->gtLclVarCommon.SetLclNum(lclNum);
- tree->gtFlags &= GTF_NODE_MASK;
+ if (tree->TypeGet() == obj->TypeGet())
+ {
+ tree->ChangeOper(GT_LCL_VAR);
+ tree->gtLclVarCommon.SetLclNum(lclNum);
+ tree->gtFlags &= GTF_NODE_MASK;
- GenTreePtr parent = fgWalkPre->parentStack->Index(1);
- if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
- {
- tree->gtFlags |= GTF_VAR_DEF;
- tree->gtFlags |= GTF_DONT_CSE;
- }
+ GenTreePtr parent = fgWalkPre->parentStack->Index(1);
+ if ((parent->gtOper == GT_ASG) && (parent->gtOp.gtOp1 == tree))
+ {
+ tree->gtFlags |= GTF_VAR_DEF;
+ tree->gtFlags |= GTF_DONT_CSE;
+ }
#ifdef DEBUG
- if (verbose)
- {
- printf("Replacing the field in normed struct with the local var:\n");
- fgWalkPre->printModified = true;
- }
-#endif // DEBUG
- return WALK_SKIP_SUBTREES;
- }
+ if (verbose)
+ {
+ printf("Replacing the field in normed struct with the local var:\n");
+ fgWalkPre->printModified = true;
}
+#endif // DEBUG
+ return WALK_SKIP_SUBTREES;
}
}
}
diff --git a/src/jit/optcse.cpp b/src/jit/optcse.cpp
index 3ff4cea385..5ee6d84920 100644
--- a/src/jit/optcse.cpp
+++ b/src/jit/optcse.cpp
@@ -996,6 +996,17 @@ void Compiler::optValnumCSE_Availablity()
/* This is a CSE def */
+ if (desc->csdDefCount == 0)
+ {
+ // This is the first def visited, so copy its conservative VN
+ desc->defConservativeVN = tree->gtVNPair.GetConservative();
+ }
+ else if (tree->gtVNPair.GetConservative() != desc->defConservativeVN)
+ {
+ // This candidate has defs with differing conservative VNs
+ desc->defConservativeVN = ValueNumStore::NoVN;
+ }
+
desc->csdDefCount += 1;
desc->csdDefWtCnt += stmw;
@@ -1778,6 +1789,8 @@ public:
m_addCSEcount++; // Record that we created a new LclVar for use as a CSE temp
m_pCompiler->optCSEcount++;
+ ValueNum defConservativeVN = successfulCandidate->CseDsc()->defConservativeVN;
+
/* Walk all references to this CSE, adding an assignment
to the CSE temp to all defs and changing all refs to
a simple use of the CSE temp.
@@ -1890,6 +1903,13 @@ public:
//
cse = m_pCompiler->gtNewLclvNode(cseLclVarNum, cseLclVarTyp);
cse->gtVNPair = exp->gtVNPair; // assign the proper Value Numbers
+ if (defConservativeVN != ValueNumStore::NoVN)
+ {
+ // All defs of this CSE share the same conservative VN, and we are rewriting this
+ // use to fetch the same value with no reload, so we can safely propagate that
+ // conservative VN to this use. This can help range check elimination later on.
+ cse->gtVNPair.SetConservative(defConservativeVN);
+ }
#ifdef DEBUG
cse->gtDebugFlags |= GTF_DEBUG_VAR_CSE_REF;
#endif // DEBUG
diff --git a/src/jit/optimizer.cpp b/src/jit/optimizer.cpp
index bd82f6a6f3..92edf62890 100644
--- a/src/jit/optimizer.cpp
+++ b/src/jit/optimizer.cpp
@@ -1193,7 +1193,10 @@ void Compiler::optRecordLoop(BasicBlock* head,
optLoopTable[loopInd].lpFlags = 0;
// We haven't yet recorded any side effects.
- optLoopTable[loopInd].lpLoopHasHeapHavoc = false;
+ for (MemoryKind memoryKind : allMemoryKinds())
+ {
+ optLoopTable[loopInd].lpLoopHasMemoryHavoc[memoryKind] = false;
+ }
optLoopTable[loopInd].lpFieldsModified = nullptr;
optLoopTable[loopInd].lpArrayElemTypesModified = nullptr;
@@ -6397,7 +6400,7 @@ bool Compiler::optVNIsLoopInvariant(ValueNum vn, unsigned lnum, VNToBoolMap* loo
res = !optLoopContains(lnum, ssaDef->m_defLoc.m_blk->bbNatLoopNum);
}
}
- else if (funcApp.m_func == VNF_PhiHeapDef)
+ else if (funcApp.m_func == VNF_PhiMemoryDef)
{
BasicBlock* defnBlk = reinterpret_cast<BasicBlock*>(vnStore->ConstantValue<ssize_t>(funcApp.m_args[0]));
res = !optLoopContains(lnum, defnBlk->bbNatLoopNum);
@@ -6837,7 +6840,8 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
AddVariableLivenessAllContainingLoops(mostNestedLoop, blk);
- bool heapHavoc = false; // True ==> there's a call or a memory store that has arbitrary heap effects.
+ // MemoryKinds for which an in-loop call or store has arbitrary effects.
+ MemoryKindSet memoryHavoc = emptyMemoryKindSet;
// Now iterate over the remaining statements, and their trees.
for (GenTreePtr stmts = blk->FirstNonPhiDef(); (stmts != nullptr); stmts = stmts->gtNext)
@@ -6846,8 +6850,8 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
{
genTreeOps oper = tree->OperGet();
- // Even after we set heapHavoc we still may want to know if a loop contains calls
- if (heapHavoc)
+ // Even after we set memoryHavoc we still may want to know if a loop contains calls
+ if (memoryHavoc == fullMemoryKindSet)
{
if (oper == GT_CALL)
{
@@ -6858,18 +6862,18 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
// If we just set lpContainsCall or it was previously set
if (optLoopTable[mostNestedLoop].lpContainsCall)
{
- // We can early exit after both heapHavoc and lpContainsCall are both set to true.
+ // We can early exit after both memoryHavoc and lpContainsCall are both set to true.
break;
}
- // We are just looking for GT_CALL nodes after heapHavoc was set.
+ // We are just looking for GT_CALL nodes after memoryHavoc was set.
continue;
}
- // otherwise heapHavoc is not set
- assert(!heapHavoc);
+ // otherwise memoryHavoc is not set for at least one heap ID
+ assert(memoryHavoc != fullMemoryKindSet);
- // This body is a distillation of the heap-side effect code of value numbering.
+ // This body is a distillation of the memory side-effect code of value numbering.
// We also do a very limited analysis if byref PtrTo values, to cover some cases
// that the compiler creates.
@@ -6884,7 +6888,7 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
if ((tree->gtFlags & GTF_IND_VOLATILE) != 0)
{
- heapHavoc = true;
+ memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
continue;
}
@@ -6906,12 +6910,14 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
CORINFO_CLASS_HANDLE elemType =
CORINFO_CLASS_HANDLE(vnStore->ConstantValue<size_t>(funcApp.m_args[0]));
AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemType);
- // Don't set heapHavoc below.
+ // Don't set memoryHavoc for GcHeap below. Do set memoryHavoc for ByrefExposed
+ // (conservatively assuming that a byref may alias the array element)
+ memoryHavoc |= memoryKindSet(ByrefExposed);
continue;
}
}
// Otherwise...
- heapHavoc = true;
+ memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
}
// Is the LHS an array index expression?
else if (lhs->ParseArrayElemForm(this, &arrInfo, &fldSeqArrElem))
@@ -6920,6 +6926,8 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
// field of "S", will lose all information about the array type.
CORINFO_CLASS_HANDLE elemTypeEq = EncodeElemType(arrInfo.m_elemType, arrInfo.m_elemStructType);
AddModifiedElemTypeAllContainingLoops(mostNestedLoop, elemTypeEq);
+ // Conservatively assume byrefs may alias this array element
+ memoryHavoc |= memoryKindSet(ByrefExposed);
}
else
{
@@ -6932,7 +6940,7 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq) &&
(fldSeq != FieldSeqStore::NotAField()))
{
- // Get the first (object) field from field seq. Heap[field] will yield the "field map".
+ // Get the first (object) field from field seq. GcHeap[field] will yield the "field map".
assert(fldSeq != nullptr);
if (fldSeq->IsFirstElemFieldSeq())
{
@@ -6941,10 +6949,12 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
}
AddModifiedFieldAllContainingLoops(mostNestedLoop, fldSeq->m_fieldHnd);
+ // Conservatively assume byrefs may alias this object.
+ memoryHavoc |= memoryKindSet(ByrefExposed);
}
else
{
- heapHavoc = true;
+ memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
}
}
}
@@ -6954,13 +6964,19 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
bool isEntire;
if (!tree->DefinesLocal(this, &lclVarTree, &isEntire))
{
- // For now, assume arbitrary side effects on the heap...
- heapHavoc = true;
+ // For now, assume arbitrary side effects on GcHeap/ByrefExposed...
+ memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
+ }
+ else if (lvaVarAddrExposed(lclVarTree->gtLclNum))
+ {
+ memoryHavoc |= memoryKindSet(ByrefExposed);
}
}
else if (lhs->OperGet() == GT_CLS_VAR)
{
AddModifiedFieldAllContainingLoops(mostNestedLoop, lhs->gtClsVar.gtClsVarHnd);
+ // Conservatively assume byrefs may alias this static field
+ memoryHavoc |= memoryKindSet(ByrefExposed);
}
// Otherwise, must be local lhs form. I should assert that.
else if (lhs->OperGet() == GT_LCL_VAR)
@@ -6979,6 +6995,11 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
->m_vnPair.SetLiberal(rhsVN);
}
}
+ // If the local is address-exposed, count this as ByrefExposed havoc
+ if (lvaVarAddrExposed(lhsLcl->gtLclNum))
+ {
+ memoryHavoc |= memoryKindSet(ByrefExposed);
+ }
}
}
else // not GenTree::OperIsAssignment(oper)
@@ -7019,7 +7040,7 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
case GT_XCHG: // Binop
case GT_CMPXCHG: // Specialop
{
- heapHavoc = true;
+ memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
}
break;
@@ -7035,7 +7056,7 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
CorInfoHelpFunc helpFunc = eeGetHelperNum(call->gtCallMethHnd);
if (s_helperCallProperties.MutatesHeap(helpFunc))
{
- heapHavoc = true;
+ memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
}
else if (s_helperCallProperties.MayRunCctor(helpFunc))
{
@@ -7045,33 +7066,39 @@ void Compiler::optComputeLoopSideEffectsOfBlock(BasicBlock* blk)
// and might have arbitrary side effects.
if ((tree->gtFlags & GTF_CALL_HOISTABLE) == 0)
{
- heapHavoc = true;
+ memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
}
}
}
else
{
- heapHavoc = true;
+ memoryHavoc |= memoryKindSet(GcHeap, ByrefExposed);
}
break;
}
default:
- // All other gtOper node kinds, leave 'heapHavoc' unchanged (i.e. false)
+ // All other gtOper node kinds, leave 'memoryHavoc' unchanged (i.e. false)
break;
}
}
}
}
- if (heapHavoc)
+ if (memoryHavoc != emptyMemoryKindSet)
{
- // Record that all loops containing this block have heap havoc effects.
+ // Record that all loops containing this block have memory havoc effects.
unsigned lnum = mostNestedLoop;
while (lnum != BasicBlock::NOT_IN_LOOP)
{
- optLoopTable[lnum].lpLoopHasHeapHavoc = true;
- lnum = optLoopTable[lnum].lpParent;
+ for (MemoryKind memoryKind : allMemoryKinds())
+ {
+ if ((memoryHavoc & memoryKindSet(memoryKind)) != 0)
+ {
+ optLoopTable[lnum].lpLoopHasMemoryHavoc[memoryKind] = true;
+ }
+ }
+ lnum = optLoopTable[lnum].lpParent;
}
}
}
@@ -7426,57 +7453,6 @@ GenTreePtr Compiler::optFindLocalInit(BasicBlock* block,
return rhs;
}
-/*****************************************************************************
- *
- * Return true if "op1" is guaranteed to be less then or equal to "op2".
- */
-
-#if FANCY_ARRAY_OPT
-
-bool Compiler::optIsNoMore(GenTreePtr op1, GenTreePtr op2, int add1, int add2)
-{
- if (op1->gtOper == GT_CNS_INT && op2->gtOper == GT_CNS_INT)
- {
- add1 += op1->gtIntCon.gtIconVal;
- add2 += op2->gtIntCon.gtIconVal;
- }
- else
- {
- /* Check for +/- constant on either operand */
-
- if (op1->gtOper == GT_ADD && op1->gtOp.gtOp2->gtOper == GT_CNS_INT)
- {
- add1 += op1->gtOp.gtOp2->gtIntCon.gtIconVal;
- op1 = op1->gtOp.gtOp1;
- }
-
- if (op2->gtOper == GT_ADD && op2->gtOp.gtOp2->gtOper == GT_CNS_INT)
- {
- add2 += op2->gtOp.gtOp2->gtIntCon.gtIconVal;
- op2 = op2->gtOp.gtOp1;
- }
-
- /* We only allow local variable references */
-
- if (op1->gtOper != GT_LCL_VAR)
- return false;
- if (op2->gtOper != GT_LCL_VAR)
- return false;
- if (op1->gtLclVarCommon.gtLclNum != op2->gtLclVarCommon.gtLclNum)
- return false;
-
- /* NOTE: Caller ensures that this variable has only one def */
-
- // printf("limit [%d]:\n", add1); gtDispTree(op1);
- // printf("size [%d]:\n", add2); gtDispTree(op2);
- // printf("\n");
- }
-
- return (bool)(add1 <= add2);
-}
-
-#endif
-
//------------------------------------------------------------------------------
// optObtainLoopCloningOpts: Identify optimization candidates and update
// the "context" for array optimizations.
@@ -7662,11 +7638,11 @@ bool Compiler::optExtractArrIndex(GenTreePtr tree, ArrIndex* result, unsigned lh
return false;
}
GenTreeBoundsChk* arrBndsChk = before->AsBoundsChk();
- if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
+ if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
{
return false;
}
- if (arrBndsChk->gtIndex->gtOper != GT_LCL_VAR)
+ if (arrBndsChk->gtArrLen->gtGetOp1()->gtOper != GT_LCL_VAR)
{
return false;
}
diff --git a/src/jit/rationalize.cpp b/src/jit/rationalize.cpp
index 7f5a26fa1f..00e0bec6f7 100644
--- a/src/jit/rationalize.cpp
+++ b/src/jit/rationalize.cpp
@@ -7,6 +7,7 @@
#pragma hdrstop
#endif
+#ifndef LEGACY_BACKEND
// state carried over the tree walk, to be used in making
// a splitting decision.
struct SplitData
@@ -116,17 +117,14 @@ void Rationalizer::RewriteSIMDOperand(LIR::Use& use, bool keepBlk)
addr->gtType = simdType;
use.ReplaceWith(comp, addr);
}
-#if defined(_TARGET_X86_)
- // For x86, if we have GT_IND(GT_ADDR(GT_SIMD)), remove the GT_IND(GT_ADDR()), leaving just
- // the GT_SIMD.
else if ((addr->OperGet() == GT_ADDR) && (addr->gtGetOp1()->OperGet() == GT_SIMD))
{
+ // if we have GT_IND(GT_ADDR(GT_SIMD)), remove the GT_IND(GT_ADDR()), leaving just the GT_SIMD.
BlockRange().Remove(tree);
BlockRange().Remove(addr);
use.ReplaceWith(comp, addr->gtGetOp1());
}
-#endif // defined(_TARGET_X86_)
else if (!keepBlk)
{
tree->SetOper(GT_IND);
@@ -729,6 +727,11 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G
case GT_IND:
// Clear the `GTF_IND_ASG_LHS` flag, which overlaps with `GTF_IND_REQ_ADDR_IN_REG`.
node->gtFlags &= ~GTF_IND_ASG_LHS;
+
+ if (varTypeIsSIMD(node))
+ {
+ RewriteSIMDOperand(use, false);
+ }
break;
case GT_NOP:
@@ -795,7 +798,7 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G
BlockRange().Remove(node);
break;
-#ifdef _TARGET_XARCH_
+#if defined(_TARGET_XARCH_) || defined(_TARGET_ARM_)
case GT_CLS_VAR:
{
// Class vars that are the target of an assignment will get rewritten into
@@ -910,7 +913,7 @@ Compiler::fgWalkResult Rationalizer::RewriteNode(GenTree** useEdge, ArrayStack<G
op1->gtType = simdType;
}
- GenTree* op2 = simdNode->gtGetOp2();
+ GenTree* op2 = simdNode->gtGetOp2IfPresent();
if (op2 != nullptr && op2->gtType == TYP_STRUCT)
{
op2->gtType = simdType;
@@ -1052,3 +1055,4 @@ void Rationalizer::DoPhase()
comp->compRationalIRForm = true;
}
+#endif // LEGACY_BACKEND
diff --git a/src/jit/regalloc.cpp b/src/jit/regalloc.cpp
index 8a7ad5a163..5c3895b4f2 100644
--- a/src/jit/regalloc.cpp
+++ b/src/jit/regalloc.cpp
@@ -1724,7 +1724,7 @@ regMaskTP Compiler::rpPredictBlkAsgRegUse(GenTreePtr tree,
bool useBarriers = false;
GenTreeBlk* dst = tree->gtGetOp1()->AsBlk();
GenTreePtr dstAddr = dst->Addr();
- GenTreePtr srcAddrOrFill = tree->gtGetOp2();
+ GenTreePtr srcAddrOrFill = tree->gtGetOp2IfPresent();
size_t blkSize = dst->gtBlkSize;
@@ -2478,7 +2478,7 @@ regMaskTP Compiler::rpPredictTreeRegUse(GenTreePtr tree,
if (kind & GTK_SMPOP)
{
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
GenTreePtr opsPtr[3];
regMaskTP regsPtr[3];
diff --git a/src/jit/registerfp.cpp b/src/jit/registerfp.cpp
index 3a3143e629..ed71886cae 100644
--- a/src/jit/registerfp.cpp
+++ b/src/jit/registerfp.cpp
@@ -243,7 +243,7 @@ void CodeGen::genFloatSimple(GenTree* tree, RegSet::RegisterPreference* pref)
case GT_COMMA:
{
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
if (tree->gtFlags & GTF_REVERSE_OPS)
{
@@ -318,7 +318,7 @@ void CodeGen::genFloatAssign(GenTree* tree)
{
var_types type = tree->TypeGet();
GenTreePtr op1 = tree->gtGetOp1();
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
regMaskTP needRegOp1 = RBM_ALLINT;
regMaskTP addrReg = RBM_NONE;
@@ -846,7 +846,7 @@ void CodeGen::genFloatArith(GenTreePtr tree, RegSet::RegisterPreference* tgtPref
var_types type = tree->TypeGet();
genTreeOps oper = tree->OperGet();
GenTreePtr op1 = tree->gtGetOp1();
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
regNumber tgtReg;
unsigned varNum;
diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp
index 39664c47bf..fb190c4fa1 100644
--- a/src/jit/simd.cpp
+++ b/src/jit/simd.cpp
@@ -347,6 +347,7 @@ var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, u
}
*sizeBytes = size;
+ setUsesSIMDTypes(true);
}
return simdBaseType;
@@ -426,16 +427,6 @@ const SIMDIntrinsicInfo* Compiler::getSIMDIntrinsicInfo(CORINFO_CLASS_HANDLE* in
return nullptr;
}
-#ifdef _TARGET_X86_
- // NYI: support LONG type SIMD intrinsics. Need support in long decomposition.
- // (Don't use NYI fallback mechanism; just call the function.)
- if ((*baseType == TYP_LONG) || (*baseType == TYP_ULONG))
- {
- JITDUMP("NYI: x86 long base type SIMD intrinsics\n");
- return nullptr;
- }
-#endif // _TARGET_X86_
-
// account for implicit "this" arg
*argCount = sig->numArgs;
if (sig->hasThis())
@@ -1156,6 +1147,154 @@ SIMDIntrinsicID Compiler::impSIMDRelOp(SIMDIntrinsicID relOpIntrinsicId,
#endif // !_TARGET_XARCH_
}
+//-------------------------------------------------------------------------
+// impSIMDAbs: creates GT_SIMD node to compute Abs value of a given vector.
+//
+// Arguments:
+// typeHnd - type handle of SIMD vector
+// baseType - base type of vector
+// size - vector size in bytes
+// op1 - operand of Abs intrinsic
+//
+GenTreePtr Compiler::impSIMDAbs(CORINFO_CLASS_HANDLE typeHnd, var_types baseType, unsigned size, GenTree* op1)
+{
+ assert(varTypeIsSIMD(op1));
+
+ var_types simdType = op1->TypeGet();
+ GenTreePtr retVal = nullptr;
+
+#ifdef _TARGET_XARCH_
+ // When there is no direct support, Abs(v) could be computed
+ // on integer vectors as follows:
+ // BitVector = v < vector.Zero
+ // result = ConditionalSelect(BitVector, vector.Zero - v, v)
+
+ bool useConditionalSelect = false;
+ if (getSIMDInstructionSet() == InstructionSet_SSE2)
+ {
+ // SSE2 doesn't support abs on signed integer type vectors.
+ if (baseType == TYP_LONG || baseType == TYP_INT || baseType == TYP_SHORT || baseType == TYP_BYTE)
+ {
+ useConditionalSelect = true;
+ }
+ }
+ else
+ {
+ assert(getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+ if (baseType == TYP_LONG)
+ {
+ // SSE3_4/AVX2 don't support abs on long type vector.
+ useConditionalSelect = true;
+ }
+ }
+
+ if (useConditionalSelect)
+ {
+ // This works only on integer vectors not on float/double vectors.
+ assert(varTypeIsIntegral(baseType));
+
+ GenTreePtr op1Assign;
+ unsigned op1LclNum;
+
+ if (op1->OperGet() == GT_LCL_VAR)
+ {
+ op1LclNum = op1->gtLclVarCommon.gtLclNum;
+ op1Assign = nullptr;
+ }
+ else
+ {
+ op1LclNum = lvaGrabTemp(true DEBUGARG("SIMD Abs op1"));
+ lvaSetStruct(op1LclNum, typeHnd, false);
+ op1Assign = gtNewTempAssign(op1LclNum, op1);
+ op1 = gtNewLclvNode(op1LclNum, op1->TypeGet());
+ }
+
+ // Assign Vector.Zero to a temp since it is needed more than once
+ GenTreePtr vecZero = gtNewSIMDVectorZero(simdType, baseType, size);
+ unsigned vecZeroLclNum = lvaGrabTemp(true DEBUGARG("SIMD Abs VecZero"));
+ lvaSetStruct(vecZeroLclNum, typeHnd, false);
+ GenTreePtr vecZeroAssign = gtNewTempAssign(vecZeroLclNum, vecZero);
+
+ // Construct BitVector = v < vector.Zero
+ GenTreePtr bitVecOp1 = op1;
+ GenTreePtr bitVecOp2 = gtNewLclvNode(vecZeroLclNum, vecZero->TypeGet());
+ var_types relOpBaseType = baseType;
+ SIMDIntrinsicID relOpIntrinsic =
+ impSIMDRelOp(SIMDIntrinsicLessThan, typeHnd, size, &relOpBaseType, &bitVecOp1, &bitVecOp2);
+ GenTreePtr bitVec = gtNewSIMDNode(simdType, bitVecOp1, bitVecOp2, relOpIntrinsic, relOpBaseType, size);
+ unsigned bitVecLclNum = lvaGrabTemp(true DEBUGARG("SIMD Abs bitVec"));
+ lvaSetStruct(bitVecLclNum, typeHnd, false);
+ GenTreePtr bitVecAssign = gtNewTempAssign(bitVecLclNum, bitVec);
+ bitVec = gtNewLclvNode(bitVecLclNum, bitVec->TypeGet());
+
+ // Construct condSelectOp1 = vector.Zero - v
+ GenTreePtr subOp1 = gtNewLclvNode(vecZeroLclNum, vecZero->TypeGet());
+ GenTreePtr subOp2 = gtNewLclvNode(op1LclNum, op1->TypeGet());
+ GenTreePtr negVec = gtNewSIMDNode(simdType, subOp1, subOp2, SIMDIntrinsicSub, baseType, size);
+
+ // Construct ConditionalSelect(bitVec, vector.Zero - v, v)
+ GenTreePtr vec = gtNewLclvNode(op1LclNum, op1->TypeGet());
+ retVal = impSIMDSelect(typeHnd, baseType, size, bitVec, negVec, vec);
+
+ // Prepend bitVec assignment to retVal.
+ // retVal = (tmp2 = v < tmp1), CondSelect(tmp2, tmp1 - v, v)
+ retVal = gtNewOperNode(GT_COMMA, simdType, bitVecAssign, retVal);
+
+ // Prepend vecZero assignment to retVal.
+ // retVal = (tmp1 = vector.Zero), (tmp2 = v < tmp1), CondSelect(tmp2, tmp1 - v, v)
+ retVal = gtNewOperNode(GT_COMMA, simdType, vecZeroAssign, retVal);
+
+ // If op1 was assigned to a temp, prepend that to retVal.
+ if (op1Assign != nullptr)
+ {
+ // retVal = (v=op1), (tmp1 = vector.Zero), (tmp2 = v < tmp1), CondSelect(tmp2, tmp1 - v, v)
+ retVal = gtNewOperNode(GT_COMMA, simdType, op1Assign, retVal);
+ }
+ }
+ else if (varTypeIsFloating(baseType))
+ {
+ // Abs(vf) = vf & new SIMDVector<float>(0x7fffffff);
+ // Abs(vd) = vf & new SIMDVector<double>(0x7fffffffffffffff);
+ GenTree* bitMask = nullptr;
+ if (baseType == TYP_FLOAT)
+ {
+ float f;
+ static_assert_no_msg(sizeof(float) == sizeof(int));
+ *((int*)&f) = 0x7fffffff;
+ bitMask = gtNewDconNode(f);
+ }
+ else if (baseType == TYP_DOUBLE)
+ {
+ double d;
+ static_assert_no_msg(sizeof(double) == sizeof(__int64));
+ *((__int64*)&d) = 0x7fffffffffffffffLL;
+ bitMask = gtNewDconNode(d);
+ }
+
+ assert(bitMask != nullptr);
+ bitMask->gtType = baseType;
+ GenTree* bitMaskVector = gtNewSIMDNode(simdType, bitMask, SIMDIntrinsicInit, baseType, size);
+ retVal = gtNewSIMDNode(simdType, op1, bitMaskVector, SIMDIntrinsicBitwiseAnd, baseType, size);
+ }
+ else if (baseType == TYP_CHAR || baseType == TYP_UBYTE || baseType == TYP_UINT || baseType == TYP_ULONG)
+ {
+ // Abs is a no-op on unsigned integer type vectors
+ retVal = op1;
+ }
+ else
+ {
+ assert(getSIMDInstructionSet() >= InstructionSet_SSE3_4);
+ assert(baseType != TYP_LONG);
+
+ retVal = gtNewSIMDNode(simdType, op1, SIMDIntrinsicAbs, baseType, size);
+ }
+#else // !_TARGET_XARCH_
+ assert(!"Abs intrinsic on non-xarch target not implemented");
+#endif // !_TARGET_XARCH_
+
+ return retVal;
+}
+
// Creates a GT_SIMD tree for Select operation
//
// Argumens:
@@ -1645,7 +1784,7 @@ GenTreePtr Compiler::createAddressNodeForSIMDInit(GenTreePtr tree, unsigned simd
GenTreeArrLen* arrLen =
new (this, GT_ARR_LENGTH) GenTreeArrLen(TYP_INT, arrayRef, (int)offsetof(CORINFO_Array, length));
GenTreeBoundsChk* arrBndsChk = new (this, GT_ARR_BOUNDS_CHECK)
- GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, checkIndexExpr, SCK_RNGCHK_FAIL);
+ GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, checkIndexExpr, arrLen, SCK_RNGCHK_FAIL);
offset += offsetof(CORINFO_Array, u1Elems);
byrefNode = gtNewOperNode(GT_COMMA, arrayRef->TypeGet(), arrBndsChk, gtCloneExpr(arrayRef));
@@ -1820,43 +1959,12 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
break;
case SIMDIntrinsicGetZero:
- {
- baseType = genActualType(baseType);
- GenTree* initVal = gtNewZeroConNode(baseType);
- initVal->gtType = baseType;
- simdTree = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, baseType, size);
- retVal = simdTree;
- }
- break;
+ retVal = gtNewSIMDVectorZero(simdType, baseType, size);
+ break;
case SIMDIntrinsicGetOne:
- {
- GenTree* initVal;
- if (varTypeIsSmallInt(baseType))
- {
- unsigned baseSize = genTypeSize(baseType);
- int val;
- if (baseSize == 1)
- {
- val = 0x01010101;
- }
- else
- {
- val = 0x00010001;
- }
- initVal = gtNewIconNode(val);
- }
- else
- {
- initVal = gtNewOneConNode(baseType);
- }
-
- baseType = genActualType(baseType);
- initVal->gtType = baseType;
- simdTree = gtNewSIMDNode(simdType, initVal, nullptr, SIMDIntrinsicInit, baseType, size);
- retVal = simdTree;
- }
- break;
+ retVal = gtNewSIMDVectorOne(simdType, baseType, size);
+ break;
case SIMDIntrinsicGetAllOnes:
{
@@ -2130,7 +2238,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
GenTreeArrLen* arrLen = new (this, GT_ARR_LENGTH)
GenTreeArrLen(TYP_INT, arrayRefForArgRngChk, (int)offsetof(CORINFO_Array, length));
argRngChk = new (this, GT_ARR_BOUNDS_CHECK)
- GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, index, op3CheckKind);
+ GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, index, arrLen, op3CheckKind);
// Now, clone op3 to create another node for the argChk
GenTree* index2 = gtCloneExpr(op3);
assert(index != nullptr);
@@ -2151,7 +2259,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
GenTreeArrLen* arrLen = new (this, GT_ARR_LENGTH)
GenTreeArrLen(TYP_INT, arrayRefForArgChk, (int)offsetof(CORINFO_Array, length));
GenTreeBoundsChk* argChk = new (this, GT_ARR_BOUNDS_CHECK)
- GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, arrLen, checkIndexExpr, op2CheckKind);
+ GenTreeBoundsChk(GT_ARR_BOUNDS_CHECK, TYP_VOID, checkIndexExpr, arrLen, op2CheckKind);
// Create a GT_COMMA tree for the bounds check(s).
op2 = gtNewOperNode(GT_COMMA, op2->TypeGet(), argChk, op2);
@@ -2383,7 +2491,7 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
GenTree* lengthNode = new (this, GT_CNS_INT) GenTreeIntCon(TYP_INT, vectorLength);
GenTreeBoundsChk* simdChk =
- new (this, GT_SIMD_CHK) GenTreeBoundsChk(GT_SIMD_CHK, TYP_VOID, lengthNode, index, SCK_RNGCHK_FAIL);
+ new (this, GT_SIMD_CHK) GenTreeBoundsChk(GT_SIMD_CHK, TYP_VOID, index, lengthNode, SCK_RNGCHK_FAIL);
// Create a GT_COMMA tree for the bounds check.
op2 = gtNewOperNode(GT_COMMA, op2->TypeGet(), simdChk, op2);
@@ -2443,54 +2551,9 @@ GenTreePtr Compiler::impSIMDIntrinsic(OPCODE opcode,
break;
case SIMDIntrinsicAbs:
- {
- op1 = impSIMDPopStack(simdType);
-
-#ifdef _TARGET_XARCH_
- if (varTypeIsFloating(baseType))
- {
- // Abs(vf) = vf & new SIMDVector<float>(0x7fffffff);
- // Abs(vd) = vf & new SIMDVector<double>(0x7fffffffffffffff);
- GenTree* bitMask = nullptr;
- if (baseType == TYP_FLOAT)
- {
- float f;
- static_assert_no_msg(sizeof(float) == sizeof(int));
- *((int*)&f) = 0x7fffffff;
- bitMask = gtNewDconNode(f);
- }
- else if (baseType == TYP_DOUBLE)
- {
- double d;
- static_assert_no_msg(sizeof(double) == sizeof(__int64));
- *((__int64*)&d) = 0x7fffffffffffffffLL;
- bitMask = gtNewDconNode(d);
- }
-
- assert(bitMask != nullptr);
- bitMask->gtType = baseType;
- GenTree* bitMaskVector = gtNewSIMDNode(simdType, bitMask, SIMDIntrinsicInit, baseType, size);
- retVal = gtNewSIMDNode(simdType, op1, bitMaskVector, SIMDIntrinsicBitwiseAnd, baseType, size);
- }
- else if (baseType == TYP_CHAR || baseType == TYP_UBYTE || baseType == TYP_UINT || baseType == TYP_ULONG)
- {
- // Abs is a no-op on unsigned integer type vectors
- retVal = op1;
- }
- else
- {
- // SSE/AVX doesn't support abs on signed integer vectors and hence
- // should never be seen as an intrinsic here. See SIMDIntrinsicList.h
- // for supported base types for this intrinsic.
- unreached();
- }
-
-#else // !_TARGET_XARCH_
- assert(!"Abs intrinsic on non-xarch target not implemented");
- unreached();
-#endif // !_TARGET_XARCH_
- }
- break;
+ op1 = impSIMDPopStack(simdType);
+ retVal = impSIMDAbs(clsHnd, baseType, size, op1);
+ break;
case SIMDIntrinsicGetW:
retVal = impSIMDGetFixed(simdType, baseType, size, 3);
diff --git a/src/jit/simdcodegenxarch.cpp b/src/jit/simdcodegenxarch.cpp
index ec933fd5d7..ace36422fb 100644
--- a/src/jit/simdcodegenxarch.cpp
+++ b/src/jit/simdcodegenxarch.cpp
@@ -75,22 +75,20 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
result = INS_vbroadcastsd;
break;
case TYP_ULONG:
- __fallthrough;
case TYP_LONG:
+ // NOTE: for x86, this instruction is valid if the src is xmm2/m64, but NOT if it is supposed
+ // to be TYP_LONG reg.
result = INS_vpbroadcastq;
break;
case TYP_UINT:
- __fallthrough;
case TYP_INT:
result = INS_vpbroadcastd;
break;
case TYP_CHAR:
- __fallthrough;
case TYP_SHORT:
result = INS_vpbroadcastw;
break;
case TYP_UBYTE:
- __fallthrough;
case TYP_BYTE:
result = INS_vpbroadcastb;
break;
@@ -99,8 +97,10 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
}
break;
}
+
// For SSE, SIMDIntrinsicInit uses the same instruction as the SIMDIntrinsicShuffleSSE2 intrinsic.
__fallthrough;
+
case SIMDIntrinsicShuffleSSE2:
if (baseType == TYP_FLOAT)
{
@@ -116,7 +116,7 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
}
else if (baseType == TYP_LONG || baseType == TYP_ULONG)
{
- // We don't have a seperate SSE2 instruction and will
+ // We don't have a separate SSE2 instruction and will
// use the instruction meant for doubles since it is
// of the same size as a long.
result = INS_shufpd;
@@ -272,6 +272,24 @@ instruction CodeGen::getOpForSIMDIntrinsic(SIMDIntrinsicID intrinsicId, var_type
}
break;
+ case SIMDIntrinsicAbs:
+ if (compiler->getSIMDInstructionSet() >= InstructionSet_SSE3_4)
+ {
+ if (baseType == TYP_INT)
+ {
+ result = INS_pabsd;
+ }
+ else if (baseType == TYP_SHORT)
+ {
+ result = INS_pabsw;
+ }
+ else if (baseType == TYP_BYTE)
+ {
+ result = INS_pabsb;
+ }
+ }
+ break;
+
case SIMDIntrinsicEqual:
if (baseType == TYP_FLOAT)
{
@@ -601,7 +619,73 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
noway_assert(!varTypeIsSmallInt(baseType) || op1->IsIntegralConst(0));
instruction ins = INS_invalid;
- if (op1->isContained())
+
+#if !defined(_TARGET_64BIT_)
+ if (op1->OperGet() == GT_LONG)
+ {
+ assert(varTypeIsLong(baseType));
+
+ GenTree* op1lo = op1->gtGetOp1();
+ GenTree* op1hi = op1->gtGetOp2();
+
+ if (op1lo->IsIntegralConst(0) && op1hi->IsIntegralConst(0))
+ {
+ genSIMDZero(targetType, baseType, targetReg);
+ }
+ else if (op1lo->IsIntegralConst(-1) && op1hi->IsIntegralConst(-1))
+ {
+ // Initialize elements of vector with all 1's: generate pcmpeqd reg, reg.
+ ins = getOpForSIMDIntrinsic(SIMDIntrinsicEqual, TYP_INT);
+ inst_RV_RV(ins, targetReg, targetReg, targetType, emitActualTypeSize(targetType));
+ }
+ else
+ {
+ // Generate:
+ // mov_i2xmm targetReg, op1lo
+ // mov_i2xmm xmmtmp, op1hi
+ // shl xmmtmp, 4 bytes
+ // por targetReg, xmmtmp
+ // Now, targetReg has the long in the low 64 bits. For SSE2, move it to the high 64 bits using:
+ // shufpd targetReg, targetReg, 0 // move the long to all the lanes
+ // For AVX2, move it to all 4 of the 64-bit lanes using:
+ // vpbroadcastq targetReg, targetReg
+
+ instruction ins;
+
+ regNumber op1loReg = genConsumeReg(op1lo);
+ ins = ins_CopyIntToFloat(TYP_INT, TYP_FLOAT);
+ inst_RV_RV(ins, targetReg, op1loReg, TYP_INT, emitTypeSize(TYP_INT));
+
+ assert(simdNode->gtRsvdRegs != RBM_NONE);
+ assert(genCountBits(simdNode->gtRsvdRegs) == 1);
+ regNumber tmpReg = genRegNumFromMask(simdNode->gtRsvdRegs);
+
+ regNumber op1hiReg = genConsumeReg(op1hi);
+ ins = ins_CopyIntToFloat(TYP_INT, TYP_FLOAT);
+ inst_RV_RV(ins, tmpReg, op1hiReg, TYP_INT, emitTypeSize(TYP_INT));
+
+ ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, baseType);
+ getEmitter()->emitIns_R_I(ins, EA_16BYTE, tmpReg, 4); // shift left by 4 bytes
+
+ ins = getOpForSIMDIntrinsic(SIMDIntrinsicBitwiseOr, baseType);
+ inst_RV_RV(ins, targetReg, tmpReg, targetType, emitActualTypeSize(targetType));
+
+#ifdef FEATURE_AVX_SUPPORT
+ if (compiler->canUseAVX())
+ {
+ inst_RV_RV(INS_vpbroadcastq, targetReg, targetReg, TYP_SIMD32, emitTypeSize(TYP_SIMD32));
+ }
+ else
+#endif // FEATURE_AVX_SUPPORT
+ {
+ ins = getOpForSIMDIntrinsic(SIMDIntrinsicShuffleSSE2, baseType);
+ getEmitter()->emitIns_R_R_I(ins, emitActualTypeSize(targetType), targetReg, targetReg, 0);
+ }
+ }
+ }
+ else
+#endif // !defined(_TARGET_64BIT_)
+ if (op1->isContained())
{
if (op1->IsIntegralConst(0) || op1->IsFPZero())
{
@@ -811,7 +895,8 @@ void CodeGen::genSIMDIntrinsicInitN(GenTreeSIMD* simdNode)
//
void CodeGen::genSIMDIntrinsicUnOp(GenTreeSIMD* simdNode)
{
- assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast);
+ assert(simdNode->gtSIMDIntrinsicID == SIMDIntrinsicSqrt || simdNode->gtSIMDIntrinsicID == SIMDIntrinsicCast ||
+ simdNode->gtSIMDIntrinsicID == SIMDIntrinsicAbs);
GenTree* op1 = simdNode->gtGetOp1();
var_types baseType = simdNode->gtSIMDBaseType;
@@ -1665,6 +1750,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
}
noway_assert(op2->isContained());
+ noway_assert(op2->IsCnsIntOrI());
unsigned int index = (unsigned int)op2->gtIntCon.gtIconVal;
unsigned int byteShiftCnt = index * genTypeSize(baseType);
@@ -1809,7 +1895,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
assert(tmpReg != REG_NA);
ins = ins_CopyFloatToInt(TYP_FLOAT, baseType);
- // (Note that for mov_xmm2i, the int register is always in the reg2 position.
+ // (Note that for mov_xmm2i, the int register is always in the reg2 position.)
inst_RV_RV(ins, tmpReg, targetReg, baseType);
}
}
@@ -2036,7 +2122,7 @@ void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
}
//-----------------------------------------------------------------------------
-// genStoreLclFldTypeSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field.
+// genStoreLclTypeSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field.
// Since Vector3 is not a hardware supported write size, it is performed
// as two stores: 8 byte followed by 4-byte.
//
@@ -2046,14 +2132,19 @@ void CodeGen::genLoadIndTypeSIMD12(GenTree* treeNode)
// Return Value:
// None.
//
-void CodeGen::genStoreLclFldTypeSIMD12(GenTree* treeNode)
+void CodeGen::genStoreLclTypeSIMD12(GenTree* treeNode)
{
- assert(treeNode->OperGet() == GT_STORE_LCL_FLD);
+ assert((treeNode->OperGet() == GT_STORE_LCL_FLD) || (treeNode->OperGet() == GT_STORE_LCL_VAR));
- unsigned offs = treeNode->gtLclFld.gtLclOffs;
+ unsigned offs = 0;
unsigned varNum = treeNode->gtLclVarCommon.gtLclNum;
assert(varNum < compiler->lvaCount);
+ if (treeNode->OperGet() == GT_LCL_FLD)
+ {
+ offs = treeNode->gtLclFld.gtLclOffs;
+ }
+
GenTreePtr op1 = treeNode->gtOp.gtOp1;
assert(!op1->isContained());
regNumber operandReg = genConsumeReg(op1);
@@ -2121,9 +2212,38 @@ void CodeGen::genLoadLclTypeSIMD12(GenTree* treeNode)
#ifdef _TARGET_X86_
//-----------------------------------------------------------------------------
+// genStoreSIMD12ToStack: store a TYP_SIMD12 (i.e. Vector3) type field to the stack.
+// Since Vector3 is not a hardware supported write size, it is performed
+// as two stores: 8 byte followed by 4-byte. The stack is assumed to have
+// already been adjusted.
+//
+// Arguments:
+// operandReg - the xmm register containing the SIMD12 to store.
+// tmpReg - an xmm register that can be used as a temporary for the operation.
+//
+// Return Value:
+// None.
+//
+void CodeGen::genStoreSIMD12ToStack(regNumber operandReg, regNumber tmpReg)
+{
+ assert(genIsValidFloatReg(operandReg));
+ assert(genIsValidFloatReg(tmpReg));
+
+ // 8-byte write
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, REG_SPBASE, 0);
+
+ // Extract upper 4-bytes from data
+ getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(TYP_SIMD16), tmpReg, operandReg, 0x02);
+
+ // 4-byte write
+ getEmitter()->emitIns_AR_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, REG_SPBASE, 8);
+}
+
+//-----------------------------------------------------------------------------
// genPutArgStkSIMD12: store a TYP_SIMD12 (i.e. Vector3) type field.
// Since Vector3 is not a hardware supported write size, it is performed
-// as two stores: 8 byte followed by 4-byte.
+// as two stores: 8 byte followed by 4-byte. The stack is assumed to have
+// already been adjusted.
//
// Arguments:
// treeNode - tree node that is attempting to store TYP_SIMD12 field
@@ -2144,19 +2264,7 @@ void CodeGen::genPutArgStkSIMD12(GenTree* treeNode)
assert(genCountBits(treeNode->gtRsvdRegs) == 1);
regNumber tmpReg = genRegNumFromMask(treeNode->gtRsvdRegs);
- // Subtract from ESP; create space for argument.
- // TODO-CQ: use 'push' instead?
- inst_RV_IV(INS_sub, REG_SPBASE, 12, EA_PTRSIZE);
- genStackLevel += 12;
-
- // 8-byte write
- getEmitter()->emitIns_AR_R(ins_Store(TYP_DOUBLE), EA_8BYTE, operandReg, REG_SPBASE, 0);
-
- // Extract upper 4-bytes from data
- getEmitter()->emitIns_R_R_I(INS_pshufd, emitActualTypeSize(TYP_SIMD16), tmpReg, operandReg, 0x02);
-
- // 4-byte write
- getEmitter()->emitIns_AR_R(ins_Store(TYP_FLOAT), EA_4BYTE, tmpReg, REG_SPBASE, 8);
+ genStoreSIMD12ToStack(operandReg, tmpReg);
}
#endif // _TARGET_X86_
@@ -2274,6 +2382,7 @@ void CodeGen::genSIMDIntrinsic(GenTreeSIMD* simdNode)
case SIMDIntrinsicSqrt:
case SIMDIntrinsicCast:
+ case SIMDIntrinsicAbs:
genSIMDIntrinsicUnOp(simdNode);
break;
diff --git a/src/jit/simdintrinsiclist.h b/src/jit/simdintrinsiclist.h
index c81f7b4bf0..0160582892 100644
--- a/src/jit/simdintrinsiclist.h
+++ b/src/jit/simdintrinsiclist.h
@@ -89,13 +89,12 @@ SIMD_INTRINSIC("op_Subtraction", false, Sub,
SIMD_INTRINSIC("op_Multiply", false, Mul, "*", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_SHORT,TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
SIMD_INTRINSIC("op_Division", false, Div, "/", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
-// Abs and SquareRoot are recognized as intrinsics only in case of float or double vectors
-SIMD_INTRINSIC("Abs", false, Abs, "abs", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_CHAR, TYP_UBYTE, TYP_UINT, TYP_ULONG, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
+// SquareRoot is recognized as an intrinsic only for float or double vectors
SIMD_INTRINSIC("SquareRoot", false, Sqrt, "sqrt", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF}, {TYP_FLOAT, TYP_DOUBLE, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF, TYP_UNDEF})
-// Min and max methods are recognized as intrinsics only in case of float or double vectors
SIMD_INTRINSIC("Min", false, Min, "min", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
SIMD_INTRINSIC("Max", false, Max, "max", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
+SIMD_INTRINSIC("Abs", false, Abs, "abs", TYP_STRUCT, 1, {TYP_STRUCT, TYP_UNDEF, TYP_UNDEF }, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
// Vector Relational operators
SIMD_INTRINSIC("Equals", false, Equal, "eq", TYP_STRUCT, 2, {TYP_STRUCT, TYP_STRUCT, TYP_UNDEF}, {TYP_INT, TYP_FLOAT, TYP_DOUBLE, TYP_LONG, TYP_CHAR, TYP_UBYTE, TYP_BYTE, TYP_SHORT, TYP_UINT, TYP_ULONG})
diff --git a/src/jit/sm.cpp b/src/jit/sm.cpp
index 859b238ec8..b016899761 100644
--- a/src/jit/sm.cpp
+++ b/src/jit/sm.cpp
@@ -51,11 +51,6 @@ void CodeSeqSM::Start(Compiler* comp)
void CodeSeqSM::Reset()
{
curState = SM_STATE_ID_START;
-
-#ifdef DEBUG
- // Reset the state occurence counts
- memset(StateMatchedCounts, 0, sizeof(StateMatchedCounts));
-#endif
}
void CodeSeqSM::End()
diff --git a/src/jit/sm.h b/src/jit/sm.h
index 33d65092bb..8c90e0b7f9 100644
--- a/src/jit/sm.h
+++ b/src/jit/sm.h
@@ -42,9 +42,7 @@ public:
inline void TermStateMatch(SM_STATE_ID stateID DEBUGARG(bool verbose))
{
assert(States[stateID].term);
- assert(StateMatchedCounts[stateID] < _UI16_MAX);
#ifdef DEBUG
- ++StateMatchedCounts[stateID];
#ifndef SMGEN_COMPILE
if (verbose)
{
@@ -65,7 +63,6 @@ public:
}
#ifdef DEBUG
- WORD StateMatchedCounts[NUM_SM_STATES];
const char* StateDesc(SM_STATE_ID stateID);
#endif
diff --git a/src/jit/ssabuilder.cpp b/src/jit/ssabuilder.cpp
index f0ee461c45..3d74234b26 100644
--- a/src/jit/ssabuilder.cpp
+++ b/src/jit/ssabuilder.cpp
@@ -103,12 +103,19 @@ void Compiler::fgResetForSsa()
{
lvaTable[i].lvPerSsaData.Reset();
}
- lvHeapPerSsaData.Reset();
- m_heapSsaMap = nullptr;
+ lvMemoryPerSsaData.Reset();
+ for (MemoryKind memoryKind : allMemoryKinds())
+ {
+ m_memorySsaMap[memoryKind] = nullptr;
+ }
+
for (BasicBlock* blk = fgFirstBB; blk != nullptr; blk = blk->bbNext)
{
// Eliminate phis.
- blk->bbHeapSsaPhiFunc = nullptr;
+ for (MemoryKind memoryKind : allMemoryKinds())
+ {
+ blk->bbMemorySsaPhiFunc[memoryKind] = nullptr;
+ }
if (blk->bbTreeList != nullptr)
{
GenTreePtr last = blk->bbTreeList->gtPrev;
@@ -804,29 +811,48 @@ void SsaBuilder::InsertPhiFunctions(BasicBlock** postOrder, int count)
}
}
- // Now make a similar phi definition if the block defines Heap.
- if (block->bbHeapDef)
+ // Now make a similar phi definition if the block defines memory.
+ if (block->bbMemoryDef != 0)
{
// For each block "bbInDomFront" that is in the dominance frontier of "block".
for (BlkSet::KeyIterator iterBlk = blkIdf->Begin(); !iterBlk.Equal(blkIdf->End()); ++iterBlk)
{
BasicBlock* bbInDomFront = iterBlk.Get();
- DBG_SSA_JITDUMP(" Considering BB%02u in dom frontier of BB%02u for Heap phis:\n",
+ DBG_SSA_JITDUMP(" Considering BB%02u in dom frontier of BB%02u for Memory phis:\n",
bbInDomFront->bbNum, block->bbNum);
- // Check if Heap is live into block "*iterBlk".
- if (!bbInDomFront->bbHeapLiveIn)
+ for (MemoryKind memoryKind : allMemoryKinds())
{
- continue;
- }
+ if ((memoryKind == GcHeap) && m_pCompiler->byrefStatesMatchGcHeapStates)
+ {
+ // Share the PhiFunc with ByrefExposed.
+ assert(memoryKind > ByrefExposed);
+ bbInDomFront->bbMemorySsaPhiFunc[memoryKind] = bbInDomFront->bbMemorySsaPhiFunc[ByrefExposed];
+ continue;
+ }
- // Check if we've already inserted a phi node.
- if (bbInDomFront->bbHeapSsaPhiFunc == nullptr)
- {
- // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier of
- // j. So insert a phi node at l.
- JITDUMP("Inserting phi definition for Heap at start of BB%02u.\n", bbInDomFront->bbNum);
- bbInDomFront->bbHeapSsaPhiFunc = BasicBlock::EmptyHeapPhiDef;
+ // Check if this memoryKind is defined in this block.
+ if ((block->bbMemoryDef & memoryKindSet(memoryKind)) == 0)
+ {
+ continue;
+ }
+
+ // Check if memoryKind is live into block "*iterBlk".
+ if ((bbInDomFront->bbMemoryLiveIn & memoryKindSet(memoryKind)) == 0)
+ {
+ continue;
+ }
+
+ // Check if we've already inserted a phi node.
+ if (bbInDomFront->bbMemorySsaPhiFunc[memoryKind] == nullptr)
+ {
+ // We have a variable i that is defined in block j and live at l, and l belongs to dom frontier
+ // of
+ // j. So insert a phi node at l.
+ JITDUMP("Inserting phi definition for %s at start of BB%02u.\n", memoryKindNames[memoryKind],
+ bbInDomFront->bbNum);
+ bbInDomFront->bbMemorySsaPhiFunc[memoryKind] = BasicBlock::EmptyMemoryPhiDef;
+ }
}
}
}
@@ -944,31 +970,63 @@ void SsaBuilder::TreeRenameVariables(GenTree* tree, BasicBlock* block, SsaRename
}
}
- // Figure out if "tree" may make a new heap state (if we care for this block).
- if (!block->bbHeapHavoc)
+ // Figure out if "tree" may make a new GC heap state (if we care for this block).
+ if ((block->bbMemoryHavoc & memoryKindSet(GcHeap)) == 0)
{
if (tree->OperIsAssignment() || tree->OperIsBlkOp())
{
if (m_pCompiler->ehBlockHasExnFlowDsc(block))
{
GenTreeLclVarCommon* lclVarNode;
- if (!tree->DefinesLocal(m_pCompiler, &lclVarNode))
+
+ bool isLocal = tree->DefinesLocal(m_pCompiler, &lclVarNode);
+ bool isAddrExposedLocal = isLocal && m_pCompiler->lvaVarAddrExposed(lclVarNode->gtLclNum);
+ bool hasByrefHavoc = ((block->bbMemoryHavoc & memoryKindSet(ByrefExposed)) != 0);
+ if (!isLocal || (isAddrExposedLocal && !hasByrefHavoc))
{
- // It *may* define the heap in a non-havoc way. Make a new SSA # -- associate with this node.
- unsigned count = pRenameState->CountForHeapDef();
- pRenameState->PushHeap(block, count);
- m_pCompiler->GetHeapSsaMap()->Set(tree, count);
-#ifdef DEBUG
- if (JitTls::GetCompiler()->verboseSsa)
+ // It *may* define byref memory in a non-havoc way. Make a new SSA # -- associate with this node.
+ unsigned count = pRenameState->CountForMemoryDef();
+ if (!hasByrefHavoc)
{
- printf("Node ");
- Compiler::printTreeID(tree);
- printf(" (in try block) may define heap; ssa # = %d.\n", count);
- }
+ pRenameState->PushMemory(ByrefExposed, block, count);
+ m_pCompiler->GetMemorySsaMap(ByrefExposed)->Set(tree, count);
+#ifdef DEBUG
+ if (JitTls::GetCompiler()->verboseSsa)
+ {
+ printf("Node ");
+ Compiler::printTreeID(tree);
+ printf(" (in try block) may define memory; ssa # = %d.\n", count);
+ }
#endif // DEBUG
- // Now add this SSA # to all phis of the reachable catch blocks.
- AddHeapDefToHandlerPhis(block, count);
+ // Now add this SSA # to all phis of the reachable catch blocks.
+ AddMemoryDefToHandlerPhis(ByrefExposed, block, count);
+ }
+
+ if (!isLocal)
+ {
+ // Add a new def for GcHeap as well
+ if (m_pCompiler->byrefStatesMatchGcHeapStates)
+ {
+ // GcHeap and ByrefExposed share the same stacks, SsaMap, and phis
+ assert(!hasByrefHavoc);
+ assert(pRenameState->CountForMemoryUse(GcHeap) == count);
+ assert(*m_pCompiler->GetMemorySsaMap(GcHeap)->LookupPointer(tree) == count);
+ assert(block->bbMemorySsaPhiFunc[GcHeap] == block->bbMemorySsaPhiFunc[ByrefExposed]);
+ }
+ else
+ {
+ if (!hasByrefHavoc)
+ {
+ // Allocate a distinct defnum for the GC Heap
+ count = pRenameState->CountForMemoryDef();
+ }
+
+ pRenameState->PushMemory(GcHeap, block, count);
+ m_pCompiler->GetMemorySsaMap(GcHeap)->Set(tree, count);
+ AddMemoryDefToHandlerPhis(GcHeap, block, count);
+ }
+ }
}
}
}
@@ -1154,7 +1212,7 @@ void SsaBuilder::AddDefToHandlerPhis(BasicBlock* block, unsigned lclNum, unsigne
}
}
-void SsaBuilder::AddHeapDefToHandlerPhis(BasicBlock* block, unsigned count)
+void SsaBuilder::AddMemoryDefToHandlerPhis(MemoryKind memoryKind, BasicBlock* block, unsigned count)
{
if (m_pCompiler->ehBlockHasExnFlowDsc(block))
{
@@ -1165,39 +1223,60 @@ void SsaBuilder::AddHeapDefToHandlerPhis(BasicBlock* block, unsigned count)
}
// Otherwise...
- DBG_SSA_JITDUMP("Definition of Heap/d:%d in block BB%02u has exn handler; adding as phi arg to handlers.\n",
- count, block->bbNum);
+ DBG_SSA_JITDUMP("Definition of %s/d:%d in block BB%02u has exn handler; adding as phi arg to handlers.\n",
+ memoryKindNames[memoryKind], count, block->bbNum);
EHblkDsc* tryBlk = m_pCompiler->ehGetBlockExnFlowDsc(block);
while (true)
{
BasicBlock* handler = tryBlk->ExFlowBlock();
- // Is Heap live on entry to the handler?
- if (handler->bbHeapLiveIn)
+ // Is memoryKind live on entry to the handler?
+ if ((handler->bbMemoryLiveIn & memoryKindSet(memoryKind)) != 0)
{
- assert(handler->bbHeapSsaPhiFunc != nullptr);
+ assert(handler->bbMemorySsaPhiFunc != nullptr);
+
+ // Add "count" to the phi args of memoryKind.
+ BasicBlock::MemoryPhiArg*& handlerMemoryPhi = handler->bbMemorySsaPhiFunc[memoryKind];
+
+#if DEBUG
+ if (m_pCompiler->byrefStatesMatchGcHeapStates)
+ {
+ // When sharing phis for GcHeap and ByrefExposed, callers should ask to add phis
+ // for ByrefExposed only.
+ assert(memoryKind != GcHeap);
+ if (memoryKind == ByrefExposed)
+ {
+ // The GcHeap and ByrefExposed phi funcs should always be in sync.
+ assert(handlerMemoryPhi == handler->bbMemorySsaPhiFunc[GcHeap]);
+ }
+ }
+#endif
- // Add "count" to the phi args of Heap.
- if (handler->bbHeapSsaPhiFunc == BasicBlock::EmptyHeapPhiDef)
+ if (handlerMemoryPhi == BasicBlock::EmptyMemoryPhiDef)
{
- handler->bbHeapSsaPhiFunc = new (m_pCompiler) BasicBlock::HeapPhiArg(count);
+ handlerMemoryPhi = new (m_pCompiler) BasicBlock::MemoryPhiArg(count);
}
else
{
#ifdef DEBUG
- BasicBlock::HeapPhiArg* curArg = handler->bbHeapSsaPhiFunc;
+ BasicBlock::MemoryPhiArg* curArg = handler->bbMemorySsaPhiFunc[memoryKind];
while (curArg != nullptr)
{
assert(curArg->GetSsaNum() != count);
curArg = curArg->m_nextArg;
}
#endif // DEBUG
- handler->bbHeapSsaPhiFunc =
- new (m_pCompiler) BasicBlock::HeapPhiArg(count, handler->bbHeapSsaPhiFunc);
+ handlerMemoryPhi = new (m_pCompiler) BasicBlock::MemoryPhiArg(count, handlerMemoryPhi);
}
- DBG_SSA_JITDUMP(" Added phi arg u:%d for Heap to phi defn in handler block BB%02u.\n", count,
- handler->bbNum);
+ DBG_SSA_JITDUMP(" Added phi arg u:%d for %s to phi defn in handler block BB%02u.\n", count,
+ memoryKindNames[memoryKind], memoryKind, handler->bbNum);
+
+ if ((memoryKind == ByrefExposed) && m_pCompiler->byrefStatesMatchGcHeapStates)
+ {
+ // Share the phi between GcHeap and ByrefExposed.
+ handler->bbMemorySsaPhiFunc[GcHeap] = handlerMemoryPhi;
+ }
}
unsigned tryInd = tryBlk->ebdEnclosingTryIndex;
if (tryInd == EHblkDsc::NO_ENCLOSING_INDEX)
@@ -1221,19 +1300,33 @@ void SsaBuilder::BlockRenameVariables(BasicBlock* block, SsaRenameState* pRename
{
// Walk the statements of the block and rename the tree variables.
- // First handle the incoming Heap state.
-
- // Is there an Phi definition for heap at the start of this block?
- if (block->bbHeapSsaPhiFunc != nullptr)
+ // First handle the incoming memory states.
+ for (MemoryKind memoryKind : allMemoryKinds())
{
- unsigned count = pRenameState->CountForHeapDef();
- pRenameState->PushHeap(block, count);
+ if ((memoryKind == GcHeap) && m_pCompiler->byrefStatesMatchGcHeapStates)
+ {
+ // ByrefExposed and GcHeap share any phi this block may have,
+ assert(block->bbMemorySsaPhiFunc[memoryKind] == block->bbMemorySsaPhiFunc[ByrefExposed]);
+ // so we will have already allocated a defnum for it if needed.
+ assert(memoryKind > ByrefExposed);
+ assert(pRenameState->CountForMemoryUse(memoryKind) == pRenameState->CountForMemoryUse(ByrefExposed));
+ }
+ else
+ {
+ // Is there an Phi definition for memoryKind at the start of this block?
+ if (block->bbMemorySsaPhiFunc[memoryKind] != nullptr)
+ {
+ unsigned count = pRenameState->CountForMemoryDef();
+ pRenameState->PushMemory(memoryKind, block, count);
- DBG_SSA_JITDUMP("Ssa # for Heap phi on entry to BB%02u is %d.\n", block->bbNum, count);
- }
+ DBG_SSA_JITDUMP("Ssa # for %s phi on entry to BB%02u is %d.\n", memoryKindNames[memoryKind],
+ block->bbNum, count);
+ }
+ }
- // Record the "in" Ssa # for Heap.
- block->bbHeapSsaNumIn = pRenameState->CountForHeapUse();
+ // Record the "in" Ssa # for memoryKind.
+ block->bbMemorySsaNumIn[memoryKind] = pRenameState->CountForMemoryUse(memoryKind);
+ }
// We need to iterate over phi definitions, to give them SSA names, but we need
// to know which are which, so we don't add phi definitions to handler phi arg lists.
@@ -1253,22 +1346,38 @@ void SsaBuilder::BlockRenameVariables(BasicBlock* block, SsaRenameState* pRename
}
}
- // Now handle the final heap state.
-
- // If the block defines Heap, allocate an SSA variable for the final heap state in the block.
- // (This may be redundant with the last SSA var explicitly created, but there's no harm in that.)
- if (block->bbHeapDef)
+ // Now handle the final memory states.
+ for (MemoryKind memoryKind : allMemoryKinds())
{
- unsigned count = pRenameState->CountForHeapDef();
- pRenameState->PushHeap(block, count);
- AddHeapDefToHandlerPhis(block, count);
- }
+ MemoryKindSet memorySet = memoryKindSet(memoryKind);
+
+ // If the block defines memory, allocate an SSA variable for the final memory state in the block.
+ // (This may be redundant with the last SSA var explicitly created, but there's no harm in that.)
+ if ((memoryKind == GcHeap) && m_pCompiler->byrefStatesMatchGcHeapStates)
+ {
+ // We've already allocated the SSA num and propagated it to shared phis, if needed,
+ // when processing ByrefExposed.
+ assert(memoryKind > ByrefExposed);
+ assert(((block->bbMemoryDef & memorySet) != 0) ==
+ ((block->bbMemoryDef & memoryKindSet(ByrefExposed)) != 0));
+ assert(pRenameState->CountForMemoryUse(memoryKind) == pRenameState->CountForMemoryUse(ByrefExposed));
+ }
+ else
+ {
+ if ((block->bbMemoryDef & memorySet) != 0)
+ {
+ unsigned count = pRenameState->CountForMemoryDef();
+ pRenameState->PushMemory(memoryKind, block, count);
+ AddMemoryDefToHandlerPhis(memoryKind, block, count);
+ }
+ }
- // Record the "out" Ssa" # for Heap.
- block->bbHeapSsaNumOut = pRenameState->CountForHeapUse();
+ // Record the "out" Ssa" # for memoryKind.
+ block->bbMemorySsaNumOut[memoryKind] = pRenameState->CountForMemoryUse(memoryKind);
- DBG_SSA_JITDUMP("Ssa # for Heap on entry to BB%02u is %d; on exit is %d.\n", block->bbNum, block->bbHeapSsaNumIn,
- block->bbHeapSsaNumOut);
+ DBG_SSA_JITDUMP("Ssa # for %s on entry to BB%02u is %d; on exit is %d.\n", memoryKindNames[memoryKind],
+ block->bbNum, block->bbMemorySsaNumIn[memoryKind], block->bbMemorySsaNumOut[memoryKind]);
+ }
}
/**
@@ -1328,34 +1437,54 @@ void SsaBuilder::AssignPhiNodeRhsVariables(BasicBlock* block, SsaRenameState* pR
m_pCompiler->fgSetStmtSeq(stmt);
}
- // Now handle Heap.
- if (succ->bbHeapSsaPhiFunc != nullptr)
+ // Now handle memory.
+ for (MemoryKind memoryKind : allMemoryKinds())
{
- if (succ->bbHeapSsaPhiFunc == BasicBlock::EmptyHeapPhiDef)
- {
- succ->bbHeapSsaPhiFunc = new (m_pCompiler) BasicBlock::HeapPhiArg(block);
- }
- else
+ BasicBlock::MemoryPhiArg*& succMemoryPhi = succ->bbMemorySsaPhiFunc[memoryKind];
+ if (succMemoryPhi != nullptr)
{
- BasicBlock::HeapPhiArg* curArg = succ->bbHeapSsaPhiFunc;
- bool found = false;
- // This is a quadratic algorithm. We might need to consider some switch over to a hash table
- // representation for the arguments of a phi node, to make this linear.
- while (curArg != nullptr)
+ if ((memoryKind == GcHeap) && m_pCompiler->byrefStatesMatchGcHeapStates)
{
- if (curArg->m_predBB == block)
- {
- found = true;
- break;
- }
- curArg = curArg->m_nextArg;
+ // We've already propagated the "out" number to the phi shared with ByrefExposed,
+ // but still need to update bbMemorySsaPhiFunc to be in sync between GcHeap and ByrefExposed.
+ assert(memoryKind > ByrefExposed);
+ assert(block->bbMemorySsaNumOut[memoryKind] == block->bbMemorySsaNumOut[ByrefExposed]);
+ assert((succ->bbMemorySsaPhiFunc[ByrefExposed] == succMemoryPhi) ||
+ (succ->bbMemorySsaPhiFunc[ByrefExposed]->m_nextArg ==
+ (succMemoryPhi == BasicBlock::EmptyMemoryPhiDef ? nullptr : succMemoryPhi)));
+ succMemoryPhi = succ->bbMemorySsaPhiFunc[ByrefExposed];
+
+ continue;
+ }
+
+ if (succMemoryPhi == BasicBlock::EmptyMemoryPhiDef)
+ {
+ succMemoryPhi = new (m_pCompiler) BasicBlock::MemoryPhiArg(block->bbMemorySsaNumOut[memoryKind]);
}
- if (!found)
+ else
{
- succ->bbHeapSsaPhiFunc = new (m_pCompiler) BasicBlock::HeapPhiArg(block, succ->bbHeapSsaPhiFunc);
+ BasicBlock::MemoryPhiArg* curArg = succMemoryPhi;
+ unsigned ssaNum = block->bbMemorySsaNumOut[memoryKind];
+ bool found = false;
+ // This is a quadratic algorithm. We might need to consider some switch over to a hash table
+ // representation for the arguments of a phi node, to make this linear.
+ while (curArg != nullptr)
+ {
+ if (curArg->m_ssaNum == ssaNum)
+ {
+ found = true;
+ break;
+ }
+ curArg = curArg->m_nextArg;
+ }
+ if (!found)
+ {
+ succMemoryPhi = new (m_pCompiler) BasicBlock::MemoryPhiArg(ssaNum, succMemoryPhi);
+ }
}
+ DBG_SSA_JITDUMP(" Added phi arg for %s u:%d from BB%02u in BB%02u.\n", memoryKindNames[memoryKind],
+ block->bbMemorySsaNumOut[memoryKind], block->bbNum, succ->bbNum);
}
- DBG_SSA_JITDUMP(" Added phi arg for Heap from BB%02u in BB%02u.\n", block->bbNum, succ->bbNum);
}
// If "succ" is the first block of a try block (and "block" is not also in that try block)
@@ -1461,28 +1590,44 @@ void SsaBuilder::AssignPhiNodeRhsVariables(BasicBlock* block, SsaRenameState* pR
}
}
- // Now handle Heap.
- if (handlerStart->bbHeapSsaPhiFunc != nullptr)
+ // Now handle memory.
+ for (MemoryKind memoryKind : allMemoryKinds())
{
- if (handlerStart->bbHeapSsaPhiFunc == BasicBlock::EmptyHeapPhiDef)
- {
- handlerStart->bbHeapSsaPhiFunc = new (m_pCompiler) BasicBlock::HeapPhiArg(block);
- }
- else
+ BasicBlock::MemoryPhiArg*& handlerMemoryPhi = handlerStart->bbMemorySsaPhiFunc[memoryKind];
+ if (handlerMemoryPhi != nullptr)
{
-#ifdef DEBUG
- BasicBlock::HeapPhiArg* curArg = handlerStart->bbHeapSsaPhiFunc;
- while (curArg != nullptr)
+ if ((memoryKind == GcHeap) && m_pCompiler->byrefStatesMatchGcHeapStates)
{
- assert(curArg->m_predBB != block);
- curArg = curArg->m_nextArg;
+ // We've already added the arg to the phi shared with ByrefExposed if needed,
+ // but still need to update bbMemorySsaPhiFunc to stay in sync.
+ assert(memoryKind > ByrefExposed);
+ assert(block->bbMemorySsaNumOut[memoryKind] == block->bbMemorySsaNumOut[ByrefExposed]);
+ assert(handlerStart->bbMemorySsaPhiFunc[ByrefExposed]->m_ssaNum ==
+ block->bbMemorySsaNumOut[memoryKind]);
+ handlerMemoryPhi = handlerStart->bbMemorySsaPhiFunc[ByrefExposed];
+
+ continue;
}
-#endif // DEBUG
- handlerStart->bbHeapSsaPhiFunc =
- new (m_pCompiler) BasicBlock::HeapPhiArg(block, handlerStart->bbHeapSsaPhiFunc);
+
+ if (handlerMemoryPhi == BasicBlock::EmptyMemoryPhiDef)
+ {
+ handlerMemoryPhi =
+ new (m_pCompiler) BasicBlock::MemoryPhiArg(block->bbMemorySsaNumOut[memoryKind]);
+ }
+ else
+ {
+ // This path has a potential to introduce redundant phi args, due to multiple
+ // preds of the same try-begin block having the same live-out memory def, and/or
+ // due to nested try-begins each having preds with the same live-out memory def.
+ // Avoid doing quadratic processing on handler phis, and instead live with the
+ // occasional redundancy.
+ handlerMemoryPhi = new (m_pCompiler)
+ BasicBlock::MemoryPhiArg(block->bbMemorySsaNumOut[memoryKind], handlerMemoryPhi);
+ }
+ DBG_SSA_JITDUMP(" Added phi arg for %s u:%d from BB%02u in BB%02u.\n",
+ memoryKindNames[memoryKind], block->bbMemorySsaNumOut[memoryKind], block->bbNum,
+ handlerStart->bbNum);
}
- DBG_SSA_JITDUMP(" Added phi arg for Heap from BB%02u in BB%02u.\n", block->bbNum,
- handlerStart->bbNum);
}
tryInd = succTry->ebdEnclosingTryIndex;
@@ -1503,8 +1648,17 @@ void SsaBuilder::BlockPopStacks(BasicBlock* block, SsaRenameState* pRenameState)
// Pop the names given to the non-phi nodes.
pRenameState->PopBlockStacks(block);
- // And for Heap.
- pRenameState->PopBlockHeapStack(block);
+ // And for memory.
+ for (MemoryKind memoryKind : allMemoryKinds())
+ {
+ if ((memoryKind == GcHeap) && m_pCompiler->byrefStatesMatchGcHeapStates)
+ {
+ // GcHeap and ByrefExposed share a rename stack, so don't try
+ // to pop it a second time.
+ continue;
+ }
+ pRenameState->PopBlockMemoryStack(memoryKind, block);
+ }
}
/**
@@ -1553,20 +1707,32 @@ void SsaBuilder::RenameVariables(BlkToBlkSetMap* domTree, SsaRenameState* pRenam
pRenameState->Push(nullptr, i, count);
}
}
- // In ValueNum we'd assume un-inited heap gets FIRST_SSA_NUM.
- // The heap is a parameter. Use FIRST_SSA_NUM as first SSA name.
- unsigned initHeapCount = pRenameState->CountForHeapDef();
- assert(initHeapCount == SsaConfig::FIRST_SSA_NUM);
- pRenameState->PushHeap(m_pCompiler->fgFirstBB, initHeapCount);
-
- // Initialize the heap ssa numbers for unreachable blocks. ValueNum expects
- // heap ssa numbers to have some intitial value.
+
+ // In ValueNum we'd assume un-inited memory gets FIRST_SSA_NUM.
+ // The memory is a parameter. Use FIRST_SSA_NUM as first SSA name.
+ unsigned initMemoryCount = pRenameState->CountForMemoryDef();
+ assert(initMemoryCount == SsaConfig::FIRST_SSA_NUM);
+ for (MemoryKind memoryKind : allMemoryKinds())
+ {
+ if ((memoryKind == GcHeap) && m_pCompiler->byrefStatesMatchGcHeapStates)
+ {
+ // GcHeap shares its stack with ByrefExposed; don't re-push.
+ continue;
+ }
+ pRenameState->PushMemory(memoryKind, m_pCompiler->fgFirstBB, initMemoryCount);
+ }
+
+ // Initialize the memory ssa numbers for unreachable blocks. ValueNum expects
+ // memory ssa numbers to have some intitial value.
for (BasicBlock* block = m_pCompiler->fgFirstBB; block; block = block->bbNext)
{
if (block->bbIDom == nullptr)
{
- block->bbHeapSsaNumIn = initHeapCount;
- block->bbHeapSsaNumOut = initHeapCount;
+ for (MemoryKind memoryKind : allMemoryKinds())
+ {
+ block->bbMemorySsaNumIn[memoryKind] = initMemoryCount;
+ block->bbMemorySsaNumOut[memoryKind] = initMemoryCount;
+ }
}
}
@@ -1625,8 +1791,8 @@ void SsaBuilder::RenameVariables(BlkToBlkSetMap* domTree, SsaRenameState* pRenam
}
}
- // Remember the number of Heap SSA names.
- m_pCompiler->lvHeapNumSsaNames = pRenameState->HeapCount();
+ // Remember the number of memory SSA names.
+ m_pCompiler->lvMemoryNumSsaNames = pRenameState->MemoryCount();
}
#ifdef DEBUG
@@ -1733,7 +1899,7 @@ void SsaBuilder::Build()
// Rename local variables and collect UD information for each ssa var.
SsaRenameState* pRenameState = new (jitstd::utility::allocate<SsaRenameState>(m_allocator), jitstd::placement_t())
- SsaRenameState(m_allocator, m_pCompiler->lvaCount);
+ SsaRenameState(m_allocator, m_pCompiler->lvaCount, m_pCompiler->byrefStatesMatchGcHeapStates);
RenameVariables(domTree, pRenameState);
EndPhase(PHASE_BUILD_SSA_RENAME);
diff --git a/src/jit/ssabuilder.h b/src/jit/ssabuilder.h
index 2fff06573e..e82a4007e3 100644
--- a/src/jit/ssabuilder.h
+++ b/src/jit/ssabuilder.h
@@ -164,8 +164,8 @@ private:
// block of those handlers.
void AddDefToHandlerPhis(BasicBlock* block, unsigned lclNum, unsigned count);
- // Same as above, for "Heap".
- void AddHeapDefToHandlerPhis(BasicBlock* block, unsigned count);
+ // Same as above, for memory.
+ void AddMemoryDefToHandlerPhis(MemoryKind memoryKind, BasicBlock* block, unsigned count);
// Requires "block" to be non-NULL. Requires "pRenameState" to be non-NULL and be currently used
// for variables renaming. Assigns the rhs arguments to the phi, i.e., block's phi node arguments.
diff --git a/src/jit/ssarenamestate.cpp b/src/jit/ssarenamestate.cpp
index a1e05f192f..4ccac05a48 100644
--- a/src/jit/ssarenamestate.cpp
+++ b/src/jit/ssarenamestate.cpp
@@ -28,14 +28,17 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*
* @params alloc The allocator class used to allocate jitstd data.
*/
-SsaRenameState::SsaRenameState(const jitstd::allocator<int>& alloc, unsigned lvaCount)
+SsaRenameState::SsaRenameState(const jitstd::allocator<int>& alloc,
+ unsigned lvaCount,
+ bool byrefStatesMatchGcHeapStates)
: counts(nullptr)
, stacks(nullptr)
, definedLocs(alloc)
- , heapStack(alloc)
- , heapCount(0)
+ , memoryStack(alloc)
+ , memoryCount(0)
, lvaCount(lvaCount)
, m_alloc(alloc)
+ , byrefStatesMatchGcHeapStates(byrefStatesMatchGcHeapStates)
{
}
@@ -200,11 +203,12 @@ void SsaRenameState::PopBlockStacks(BasicBlock* block)
#endif // DEBUG
}
-void SsaRenameState::PopBlockHeapStack(BasicBlock* block)
+void SsaRenameState::PopBlockMemoryStack(MemoryKind memoryKind, BasicBlock* block)
{
- while (heapStack.size() > 0 && heapStack.back().m_bb == block)
+ auto& stack = memoryStack[memoryKind];
+ while (stack.size() > 0 && stack.back().m_bb == block)
{
- heapStack.pop_back();
+ stack.pop_back();
}
}
diff --git a/src/jit/ssarenamestate.h b/src/jit/ssarenamestate.h
index 1db36c5b37..a8496b6386 100644
--- a/src/jit/ssarenamestate.h
+++ b/src/jit/ssarenamestate.h
@@ -23,6 +23,53 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
#include "jitstd.h"
+// Fixed-size array that can hold elements with no default constructor;
+// it will construct them all by forwarding whatever arguments are
+// supplied to its constructor.
+template <typename T, int N>
+class ConstructedArray
+{
+ union {
+ // Storage that gets used to hold the T objects.
+ unsigned char bytes[N * sizeof(T)];
+
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+ // With MSVC pre-VS2015, the code in the #else branch would hit error C2621,
+ // so in that case just count on pointer alignment being sufficient
+ // (currently T is only ever instantiated as jitstd::list<SsaRenameStateForBlock>)
+
+ // Unused (except to impart alignment requirement)
+ void* pointer;
+#else
+ // Unused (except to impart alignment requirement)
+ T alignedArray[N];
+#endif // defined(_MSC_VER) && (_MSC_VER < 1900)
+ };
+
+public:
+ T& operator[](size_t i)
+ {
+ return *(reinterpret_cast<T*>(bytes + i * sizeof(T)));
+ }
+
+ template <typename... Args>
+ ConstructedArray(Args&&... args)
+ {
+ for (int i = 0; i < N; ++i)
+ {
+ new (bytes + i * sizeof(T), jitstd::placement_t()) T(jitstd::forward<Args>(args)...);
+ }
+ }
+
+ ~ConstructedArray()
+ {
+ for (int i = 0; i < N; ++i)
+ {
+ operator[](i).~T();
+ }
+ }
+};
+
struct SsaRenameStateForBlock
{
BasicBlock* m_bb;
@@ -54,7 +101,7 @@ struct SsaRenameState
typedef unsigned* Counts;
typedef jitstd::list<SsaRenameStateLocDef> DefStack;
- SsaRenameState(const jitstd::allocator<int>& allocator, unsigned lvaCount);
+ SsaRenameState(const jitstd::allocator<int>& allocator, unsigned lvaCount, bool byrefStatesMatchGcHeapStates);
void EnsureCounts();
void EnsureStacks();
@@ -74,32 +121,42 @@ struct SsaRenameState
// Pop all stacks that have an entry for "bb" on top.
void PopBlockStacks(BasicBlock* bb);
- // Similar functions for the special implicit "Heap" variable.
- unsigned CountForHeapDef()
+ // Similar functions for the special implicit memory variable.
+ unsigned CountForMemoryDef()
{
- if (heapCount == 0)
+ if (memoryCount == 0)
{
- heapCount = SsaConfig::FIRST_SSA_NUM;
+ memoryCount = SsaConfig::FIRST_SSA_NUM;
}
- unsigned res = heapCount;
- heapCount++;
+ unsigned res = memoryCount;
+ memoryCount++;
return res;
}
- unsigned CountForHeapUse()
+ unsigned CountForMemoryUse(MemoryKind memoryKind)
{
- return heapStack.back().m_count;
+ if ((memoryKind == GcHeap) && byrefStatesMatchGcHeapStates)
+ {
+ // Share rename stacks in this configuration.
+ memoryKind = ByrefExposed;
+ }
+ return memoryStack[memoryKind].back().m_count;
}
- void PushHeap(BasicBlock* bb, unsigned count)
+ void PushMemory(MemoryKind memoryKind, BasicBlock* bb, unsigned count)
{
- heapStack.push_back(SsaRenameStateForBlock(bb, count));
+ if ((memoryKind == GcHeap) && byrefStatesMatchGcHeapStates)
+ {
+ // Share rename stacks in this configuration.
+ memoryKind = ByrefExposed;
+ }
+ memoryStack[memoryKind].push_back(SsaRenameStateForBlock(bb, count));
}
- void PopBlockHeapStack(BasicBlock* bb);
+ void PopBlockMemoryStack(MemoryKind memoryKind, BasicBlock* bb);
- unsigned HeapCount()
+ unsigned MemoryCount()
{
- return heapCount;
+ return memoryCount;
}
#ifdef DEBUG
@@ -117,13 +174,16 @@ private:
// This list represents the set of locals defined in the current block.
DefStack definedLocs;
- // Same state for the special implicit Heap variable.
- Stack heapStack;
- unsigned heapCount;
+ // Same state for the special implicit memory variables.
+ ConstructedArray<Stack, MemoryKindCount> memoryStack;
+ unsigned memoryCount;
// Number of stacks/counts to allocate.
unsigned lvaCount;
// Allocator to allocate stacks.
jitstd::allocator<void> m_alloc;
+
+ // Indicates whether GcHeap and ByrefExposed use the same state.
+ bool byrefStatesMatchGcHeapStates;
};
diff --git a/src/jit/stackfp.cpp b/src/jit/stackfp.cpp
index 43c463039e..3e0eceabb7 100644
--- a/src/jit/stackfp.cpp
+++ b/src/jit/stackfp.cpp
@@ -1376,7 +1376,7 @@ void CodeGen::genCodeForTreeStackFP_Asg(GenTreePtr tree)
emitAttr size;
unsigned offs;
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
assert(tree->OperGet() == GT_ASG);
@@ -1693,14 +1693,14 @@ void CodeGen::genCodeForTreeStackFP_Arithm(GenTreePtr tree)
if (tree->gtFlags & GTF_REVERSE_OPS)
{
bReverse = true;
- op1 = tree->gtGetOp2();
+ op1 = tree->gtGetOp2IfPresent();
op2 = tree->gtOp.gtOp1;
}
else
{
bReverse = false;
op1 = tree->gtOp.gtOp1;
- op2 = tree->gtGetOp2();
+ op2 = tree->gtGetOp2IfPresent();
}
regNumber result;
@@ -1928,7 +1928,7 @@ void CodeGen::genCodeForTreeStackFP_AsgArithm(GenTreePtr tree)
GenTreePtr op1, op2;
op1 = tree->gtOp.gtOp1;
- op2 = tree->gtGetOp2();
+ op2 = tree->gtGetOp2IfPresent();
genSetupForOpStackFP(op1, op2, (tree->gtFlags & GTF_REVERSE_OPS) ? true : false, true, false, true);
@@ -2208,7 +2208,7 @@ void CodeGen::genCodeForTreeStackFP_SmpOp(GenTreePtr tree)
case GT_COMMA:
{
GenTreePtr op1 = tree->gtOp.gtOp1;
- GenTreePtr op2 = tree->gtGetOp2();
+ GenTreePtr op2 = tree->gtGetOp2IfPresent();
if (tree->gtFlags & GTF_REVERSE_OPS)
{
diff --git a/src/jit/target.h b/src/jit/target.h
index a726525488..5b608ddfac 100644
--- a/src/jit/target.h
+++ b/src/jit/target.h
@@ -13,6 +13,13 @@
#endif
#endif
+// If the UNIX_X86_ABI is defined make sure that _TARGET_X86_ is also defined.
+#if defined(UNIX_X86_ABI)
+#if !defined(_TARGET_X86_)
+#error When UNIX_X86_ABI is defined you must define _TARGET_X86_ defined as well.
+#endif
+#endif
+
#if (defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
#define FEATURE_VARARG 0
#else // !(defined(FEATURE_CORECLR) && defined(PLATFORM_UNIX))
@@ -402,7 +409,11 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
// target
#define FEATURE_EH 1 // To aid platform bring-up, eliminate exceptional EH clauses (catch, filter,
// filter-handler, fault) and directly execute 'finally' clauses.
+#if defined(FEATURE_PAL) && !defined(LEGACY_BACKEND)
+ #define FEATURE_EH_FUNCLETS 1
+#else // FEATURE_PAL && !LEGACY_BACKEND
#define FEATURE_EH_FUNCLETS 0
+#endif // FEATURE_PAL && !LEGACY_BACKEND
#define FEATURE_EH_CALLFINALLY_THUNKS 0 // Generate call-to-finally code in "thunks" in the enclosing EH region,
// protected by "cloned finally" clauses.
#ifndef LEGACY_BACKEND
@@ -484,9 +495,15 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define MIN_ARG_AREA_FOR_CALL 0 // Minimum required outgoing argument space for a call.
#define CODE_ALIGN 1 // code alignment requirement
+#if !defined(UNIX_X86_ABI)
#define STACK_ALIGN 4 // stack alignment requirement
#define STACK_ALIGN_SHIFT 2 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
#define STACK_ALIGN_SHIFT_ALL 2 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units
+#else
+ #define STACK_ALIGN 16 // stack alignment requirement
+ #define STACK_ALIGN_SHIFT 4 // Shift-right amount to convert stack size in bytes to size in DWORD_PTRs
+ #define STACK_ALIGN_SHIFT_ALL 4 // Shift-right amount to convert stack size in bytes to size in STACK_ALIGN units
+#endif // !UNIX_X86_ABI
#define RBM_INT_CALLEE_SAVED (RBM_EBX|RBM_ESI|RBM_EDI)
#define RBM_INT_CALLEE_TRASH (RBM_EAX|RBM_ECX|RBM_EDX)
@@ -1226,6 +1243,7 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define RBM_CALLEE_SAVED (RBM_INT_CALLEE_SAVED | RBM_FLT_CALLEE_SAVED)
#define RBM_CALLEE_TRASH (RBM_INT_CALLEE_TRASH | RBM_FLT_CALLEE_TRASH)
#define RBM_CALLEE_TRASH_NOGC (RBM_R2|RBM_R3|RBM_LR)
+ #define REG_DEFAULT_HELPER_CALL_TARGET REG_R12
#define RBM_ALLINT (RBM_INT_CALLEE_SAVED | RBM_INT_CALLEE_TRASH)
#define RBM_ALLFLOAT (RBM_FLT_CALLEE_SAVED | RBM_FLT_CALLEE_TRASH)
@@ -1433,6 +1451,8 @@ typedef unsigned short regPairNoSmall; // arm: need 12 bits
#define REG_ARG_FIRST REG_R0
#define REG_ARG_LAST REG_R3
+ #define REG_ARG_FP_FIRST REG_F0
+ #define REG_ARG_FP_LAST REG_F7
#define INIT_ARG_STACK_SLOT 0 // No outgoing reserved stack slots
#define REG_ARG_0 REG_R0
diff --git a/src/jit/unwind.cpp b/src/jit/unwind.cpp
index 4568fed75a..b354504bb7 100644
--- a/src/jit/unwind.cpp
+++ b/src/jit/unwind.cpp
@@ -132,37 +132,7 @@ void Compiler::unwindGetFuncLocations(FuncInfoDsc* func,
#elif defined(_TARGET_X86_)
-// Stub routines that do nothing
-void Compiler::unwindBegProlog()
-{
-}
-void Compiler::unwindEndProlog()
-{
-}
-void Compiler::unwindBegEpilog()
-{
-}
-void Compiler::unwindEndEpilog()
-{
-}
-void Compiler::unwindReserve()
-{
-}
-void Compiler::unwindEmit(void* pHotCode, void* pColdCode)
-{
-}
-void Compiler::unwindPush(regNumber reg)
-{
-}
-void Compiler::unwindAllocStack(unsigned size)
-{
-}
-void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset)
-{
-}
-void Compiler::unwindSaveReg(regNumber reg, unsigned offset)
-{
-}
+// See unwindX86.cpp
#else // _TARGET_*
diff --git a/src/jit/unwindx86.cpp b/src/jit/unwindx86.cpp
new file mode 100644
index 0000000000..516155c6a2
--- /dev/null
+++ b/src/jit/unwindx86.cpp
@@ -0,0 +1,249 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XX XX
+XX UnwindInfo XX
+XX XX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+*/
+
+#include "jitpch.h"
+#ifdef _MSC_VER
+#pragma hdrstop
+#endif
+
+#ifndef _TARGET_X86_
+#error "This should be included only for x86"
+#endif // _TARGET_X86_
+
+void Compiler::unwindBegProlog()
+{
+}
+
+void Compiler::unwindEndProlog()
+{
+}
+
+void Compiler::unwindBegEpilog()
+{
+}
+
+void Compiler::unwindEndEpilog()
+{
+}
+
+void Compiler::unwindPush(regNumber reg)
+{
+}
+
+void Compiler::unwindAllocStack(unsigned size)
+{
+}
+
+void Compiler::unwindSetFrameReg(regNumber reg, unsigned offset)
+{
+}
+
+void Compiler::unwindSaveReg(regNumber reg, unsigned offset)
+{
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindReserve: Ask the VM to reserve space for the unwind information
+// for the function and all its funclets. Called once, just before asking the VM
+// for memory and emitting the generated code. Calls unwindReserveFunc() to handle
+// the main function and each of the funclets, in turn.
+//
+void Compiler::unwindReserve()
+{
+#if FEATURE_EH_FUNCLETS
+ assert(!compGeneratingProlog);
+ assert(!compGeneratingEpilog);
+
+ assert(compFuncInfoCount > 0);
+ for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+ {
+ unwindReserveFunc(funGetFunc(funcIdx));
+ }
+#endif
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindEmit: Report all the unwind information to the VM.
+//
+// Arguments:
+// pHotCode - Pointer to the beginning of the memory with the function and funclet hot code.
+// pColdCode - Pointer to the beginning of the memory with the function and funclet cold code.
+//
+void Compiler::unwindEmit(void* pHotCode, void* pColdCode)
+{
+#if FEATURE_EH_FUNCLETS
+ assert(!compGeneratingProlog);
+ assert(!compGeneratingEpilog);
+
+ assert(compFuncInfoCount > 0);
+ for (unsigned funcIdx = 0; funcIdx < compFuncInfoCount; funcIdx++)
+ {
+ unwindEmitFunc(funGetFunc(funcIdx), pHotCode, pColdCode);
+ }
+#endif // FEATURE_EH_FUNCLETS
+}
+
+#if FEATURE_EH_FUNCLETS
+//------------------------------------------------------------------------
+// Compiler::unwindReserveFunc: Reserve the unwind information from the VM for a
+// given main function or funclet.
+//
+// Arguments:
+// func - The main function or funclet to reserve unwind info for.
+//
+void Compiler::unwindReserveFunc(FuncInfoDsc* func)
+{
+ unwindReserveFuncHelper(func, true);
+
+ if (fgFirstColdBlock != nullptr)
+ {
+ unwindReserveFuncHelper(func, false);
+ }
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindReserveFuncHelper: Reserve the unwind information from the VM for a
+// given main function or funclet, for either the hot or the cold section.
+//
+// Arguments:
+// func - The main function or funclet to reserve unwind info for.
+// isHotCode - 'true' to reserve the hot section, 'false' to reserve the cold section.
+//
+void Compiler::unwindReserveFuncHelper(FuncInfoDsc* func, bool isHotCode)
+{
+ BOOL isFunclet = (func->funKind != FUNC_ROOT);
+ BOOL isColdCode = isHotCode ? FALSE : TRUE;
+
+ eeReserveUnwindInfo(isFunclet, isColdCode, sizeof(UNWIND_INFO));
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindEmitFunc: Report the unwind information to the VM for a
+// given main function or funclet. Reports the hot section, then the cold
+// section if necessary.
+//
+// Arguments:
+// func - The main function or funclet to reserve unwind info for.
+// pHotCode - Pointer to the beginning of the memory with the function and funclet hot code.
+// pColdCode - Pointer to the beginning of the memory with the function and funclet cold code.
+//
+void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode)
+{
+ // Verify that the JIT enum is in sync with the JIT-EE interface enum
+ static_assert_no_msg(FUNC_ROOT == (FuncKind)CORJIT_FUNC_ROOT);
+ static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER);
+ static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER);
+
+ unwindEmitFuncHelper(func, pHotCode, pColdCode, true);
+
+ if (pColdCode != nullptr)
+ {
+ unwindEmitFuncHelper(func, pHotCode, pColdCode, false);
+ }
+}
+
+//------------------------------------------------------------------------
+// Compiler::unwindEmitFuncHelper: Report the unwind information to the VM for a
+// given main function or funclet, for either the hot or cold section.
+//
+// Arguments:
+// func - The main function or funclet to reserve unwind info for.
+// pHotCode - Pointer to the beginning of the memory with the function and funclet hot code.
+// pColdCode - Pointer to the beginning of the memory with the function and funclet cold code.
+// Ignored if 'isHotCode' is true.
+// isHotCode - 'true' to report the hot section, 'false' to report the cold section.
+//
+void Compiler::unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pColdCode, bool isHotCode)
+{
+ UNATIVE_OFFSET startOffset;
+ UNATIVE_OFFSET endOffset;
+
+ if (isHotCode)
+ {
+ emitLocation* startLoc;
+ emitLocation* endLoc;
+
+ unwindGetFuncLocations(func, true, &startLoc, &endLoc);
+
+ if (startLoc == nullptr)
+ {
+ startOffset = 0;
+ }
+ else
+ {
+ startOffset = startLoc->CodeOffset(genEmitter);
+ }
+
+ if (endLoc == nullptr)
+ {
+ endOffset = info.compNativeCodeSize;
+ }
+ else
+ {
+ endOffset = endLoc->CodeOffset(genEmitter);
+ }
+ }
+ else
+ {
+ emitLocation* coldStartLoc;
+ emitLocation* coldEndLoc;
+
+ assert(fgFirstColdBlock != nullptr);
+ assert(func->funKind == FUNC_ROOT); // No splitting of funclets.
+
+ unwindGetFuncLocations(func, false, &coldStartLoc, &coldEndLoc);
+
+ if (coldStartLoc == nullptr)
+ {
+ startOffset = 0;
+ }
+ else
+ {
+ startOffset = coldStartLoc->CodeOffset(genEmitter);
+ }
+
+ if (coldEndLoc == nullptr)
+ {
+ endOffset = info.compNativeCodeSize;
+ }
+ else
+ {
+ endOffset = coldEndLoc->CodeOffset(genEmitter);
+ }
+ }
+
+ // Adjust for cold or hot code:
+ // 1. The VM doesn't want the cold code pointer unless this is cold code.
+ // 2. The startOffset and endOffset need to be from the base of the hot section for hot code
+ // and from the base of the cold section for cold code
+
+ if (isHotCode)
+ {
+ assert(endOffset <= info.compTotalHotCodeSize);
+ pColdCode = nullptr;
+ }
+ else
+ {
+ assert(startOffset >= info.compTotalHotCodeSize);
+ startOffset -= info.compTotalHotCodeSize;
+ endOffset -= info.compTotalHotCodeSize;
+ }
+
+ UNWIND_INFO unwindInfo;
+
+ unwindInfo.FunctionLength = (ULONG)(endOffset - startOffset);
+
+ eeAllocUnwindInfo((BYTE*)pHotCode, (BYTE*)pColdCode, startOffset, endOffset, sizeof(UNWIND_INFO),
+ (BYTE*)&unwindInfo, (CorJitFuncKind)func->funKind);
+}
+#endif // FEATURE_EH_FUNCLETS
diff --git a/src/jit/valuenum.cpp b/src/jit/valuenum.cpp
index f7cc0c9a23..aba29c4411 100644
--- a/src/jit/valuenum.cpp
+++ b/src/jit/valuenum.cpp
@@ -1373,10 +1373,10 @@ TailCall:
goto TailCall;
}
}
- else if (funcApp.m_func == VNF_PhiDef || funcApp.m_func == VNF_PhiHeapDef)
+ else if (funcApp.m_func == VNF_PhiDef || funcApp.m_func == VNF_PhiMemoryDef)
{
- unsigned lclNum = BAD_VAR_NUM;
- bool isHeap = false;
+ unsigned lclNum = BAD_VAR_NUM;
+ bool isMemory = false;
VNFuncApp phiFuncApp;
bool defArgIsFunc = false;
if (funcApp.m_func == VNF_PhiDef)
@@ -1386,8 +1386,8 @@ TailCall:
}
else
{
- assert(funcApp.m_func == VNF_PhiHeapDef);
- isHeap = true;
+ assert(funcApp.m_func == VNF_PhiMemoryDef);
+ isMemory = true;
defArgIsFunc = GetVNFunc(funcApp.m_args[1], &phiFuncApp);
}
if (defArgIsFunc && phiFuncApp.m_func == VNF_Phi)
@@ -1401,9 +1401,9 @@ TailCall:
assert(IsVNConstant(phiFuncApp.m_args[0]));
unsigned phiArgSsaNum = ConstantValue<unsigned>(phiFuncApp.m_args[0]);
ValueNum phiArgVN;
- if (isHeap)
+ if (isMemory)
{
- phiArgVN = m_pComp->GetHeapPerSsaData(phiArgSsaNum)->m_vnPair.Get(vnk);
+ phiArgVN = m_pComp->GetMemoryPerSsaData(phiArgSsaNum)->m_vnPair.Get(vnk);
}
else
{
@@ -1430,9 +1430,9 @@ TailCall:
}
assert(IsVNConstant(cur));
phiArgSsaNum = ConstantValue<unsigned>(cur);
- if (isHeap)
+ if (isMemory)
{
- phiArgVN = m_pComp->GetHeapPerSsaData(phiArgSsaNum)->m_vnPair.Get(vnk);
+ phiArgVN = m_pComp->GetMemoryPerSsaData(phiArgSsaNum)->m_vnPair.Get(vnk);
}
else
{
@@ -2465,9 +2465,10 @@ ValueNum ValueNumStore::VNApplySelectorsAssignTypeCoerce(ValueNum elem, var_type
//------------------------------------------------------------------------
// VNApplySelectorsAssign: Compute the value number corresponding to "map" but with
-// the element at "fieldSeq" updated to have type "elem"; this is the new heap
-// value for an assignment of value "elem" into the heap at location "fieldSeq"
-// that occurs in block "block" and has type "indType".
+// the element at "fieldSeq" updated to have type "elem"; this is the new memory
+// value for an assignment of value "elem" into the memory at location "fieldSeq"
+// that occurs in block "block" and has type "indType" (so long as the selectors
+// into that memory occupy disjoint locations, which is true for GcHeap).
//
// Arguments:
// vnk - Identifies whether to recurse to Conservative or Liberal value numbers
@@ -2478,7 +2479,7 @@ ValueNum ValueNumStore::VNApplySelectorsAssignTypeCoerce(ValueNum elem, var_type
// block - Block where the assignment occurs
//
// Return Value:
-// The value number corresopnding to the heap after the assignment.
+// The value number corresponding to memory after the assignment.
ValueNum ValueNumStore::VNApplySelectorsAssign(
ValueNumKind vnk, ValueNum map, FieldSeqNode* fieldSeq, ValueNum elem, var_types indType, BasicBlock* block)
@@ -2712,17 +2713,17 @@ ValueNum ValueNumStore::ExtendPtrVN(GenTreePtr opA, FieldSeqNode* fldSeq)
return res;
}
-void Compiler::fgValueNumberArrIndexAssign(CORINFO_CLASS_HANDLE elemTypeEq,
- ValueNum arrVN,
- ValueNum inxVN,
- FieldSeqNode* fldSeq,
- ValueNum rhsVN,
- var_types indType)
+ValueNum Compiler::fgValueNumberArrIndexAssign(CORINFO_CLASS_HANDLE elemTypeEq,
+ ValueNum arrVN,
+ ValueNum inxVN,
+ FieldSeqNode* fldSeq,
+ ValueNum rhsVN,
+ var_types indType)
{
bool invalidateArray = false;
ValueNum elemTypeEqVN = vnStore->VNForHandle(ssize_t(elemTypeEq), GTF_ICON_CLASS_HDL);
var_types arrElemType = DecodeElemType(elemTypeEq);
- ValueNum hAtArrType = vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, fgCurHeapVN, elemTypeEqVN);
+ ValueNum hAtArrType = vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, fgCurMemoryVN[GcHeap], elemTypeEqVN);
ValueNum hAtArrTypeAtArr = vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, hAtArrType, arrVN);
ValueNum hAtArrTypeAtArrAtInx = vnStore->VNForMapSelect(VNK_Liberal, arrElemType, hAtArrTypeAtArr, inxVN);
@@ -2779,7 +2780,7 @@ void Compiler::fgValueNumberArrIndexAssign(CORINFO_CLASS_HANDLE elemTypeEq,
#ifdef DEBUG
if (verbose)
{
- printf(" hAtArrType " STR_VN "%x is MapSelect(curHeap(" STR_VN "%x), ", hAtArrType, fgCurHeapVN);
+ printf(" hAtArrType " STR_VN "%x is MapSelect(curGcHeap(" STR_VN "%x), ", hAtArrType, fgCurMemoryVN[GcHeap]);
if (arrElemType == TYP_STRUCT)
{
@@ -2809,14 +2810,11 @@ void Compiler::fgValueNumberArrIndexAssign(CORINFO_CLASS_HANDLE elemTypeEq,
vnStore->vnDump(this, newValAtArrType);
printf("\n");
- printf(" fgCurHeapVN assigned:\n");
+ printf(" fgCurMemoryVN assigned:\n");
}
#endif // DEBUG
- // bbHeapDef must be set to true for any block that Mutates the global Heap
- assert(compCurBB->bbHeapDef);
-
- fgCurHeapVN = vnStore->VNForMapStore(TYP_REF, fgCurHeapVN, elemTypeEqVN, newValAtArrType);
+ return vnStore->VNForMapStore(TYP_REF, fgCurMemoryVN[GcHeap], elemTypeEqVN, newValAtArrType);
}
ValueNum Compiler::fgValueNumberArrIndexVal(GenTreePtr tree, VNFuncApp* pFuncApp, ValueNum addrXvn)
@@ -2869,14 +2867,15 @@ ValueNum Compiler::fgValueNumberArrIndexVal(GenTreePtr tree,
else
{
ValueNum elemTypeEqVN = vnStore->VNForHandle(ssize_t(elemTypeEq), GTF_ICON_CLASS_HDL);
- ValueNum hAtArrType = vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, fgCurHeapVN, elemTypeEqVN);
+ ValueNum hAtArrType = vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, fgCurMemoryVN[GcHeap], elemTypeEqVN);
ValueNum hAtArrTypeAtArr = vnStore->VNForMapSelect(VNK_Liberal, TYP_REF, hAtArrType, arrVN);
ValueNum wholeElem = vnStore->VNForMapSelect(VNK_Liberal, elemTyp, hAtArrTypeAtArr, inxVN);
#ifdef DEBUG
if (verbose)
{
- printf(" hAtArrType " STR_VN "%x is MapSelect(curHeap(" STR_VN "%x), ", hAtArrType, fgCurHeapVN);
+ printf(" hAtArrType " STR_VN "%x is MapSelect(curGcHeap(" STR_VN "%x), ", hAtArrType,
+ fgCurMemoryVN[GcHeap]);
if (elemTyp == TYP_STRUCT)
{
printf("%s[]).\n", eeGetClassName(elemTypeEq));
@@ -2923,6 +2922,17 @@ ValueNum Compiler::fgValueNumberArrIndexVal(GenTreePtr tree,
return selectedElem;
}
+ValueNum Compiler::fgValueNumberByrefExposedLoad(var_types type, ValueNum pointerVN)
+{
+ ValueNum memoryVN = fgCurMemoryVN[ByrefExposed];
+ // The memoization for VNFunc applications does not factor in the result type, so
+ // VNF_ByrefExposedLoad takes the loaded type as an explicit parameter.
+ ValueNum typeVN = vnStore->VNForIntCon(type);
+ ValueNum loadVN = vnStore->VNForFunc(type, VNF_ByrefExposedLoad, typeVN, vnStore->VNNormVal(pointerVN), memoryVN);
+
+ return loadVN;
+}
+
var_types ValueNumStore::TypeOfVN(ValueNum vn)
{
if (vn == NoVN)
@@ -4200,10 +4210,10 @@ void Compiler::fgValueNumber()
else
{
ValueNumPair noVnp;
- // Make sure the heap SSA names have no value numbers.
- for (unsigned i = 0; i < lvHeapNumSsaNames; i++)
+ // Make sure the memory SSA names have no value numbers.
+ for (unsigned i = 0; i < lvMemoryNumSsaNames; i++)
{
- lvHeapPerSsaData.GetRef(i).m_vnPair = noVnp;
+ lvMemoryPerSsaData.GetRef(i).m_vnPair = noVnp;
}
for (BasicBlock* blk = fgFirstBB; blk != nullptr; blk = blk->bbNext)
{
@@ -4309,13 +4319,13 @@ void Compiler::fgValueNumber()
ssaDef->m_defLoc.m_blk = fgFirstBB;
}
}
- // Give "Heap" an initial value number (about which we know nothing).
- ValueNum heapInitVal = vnStore->VNForFunc(TYP_REF, VNF_InitVal, vnStore->VNForIntCon(-1)); // Use -1 for the heap.
- GetHeapPerSsaData(SsaConfig::FIRST_SSA_NUM)->m_vnPair.SetBoth(heapInitVal);
+ // Give memory an initial value number (about which we know nothing).
+ ValueNum memoryInitVal = vnStore->VNForFunc(TYP_REF, VNF_InitVal, vnStore->VNForIntCon(-1)); // Use -1 for memory.
+ GetMemoryPerSsaData(SsaConfig::FIRST_SSA_NUM)->m_vnPair.SetBoth(memoryInitVal);
#ifdef DEBUG
if (verbose)
{
- printf("Heap Initial Value in BB01 is: " STR_VN "%x\n", heapInitVal);
+ printf("Memory Initial Value in BB01 is: " STR_VN "%x\n", memoryInitVal);
}
#endif // DEBUG
@@ -4329,7 +4339,7 @@ void Compiler::fgValueNumber()
while (vs.m_toDoAllPredsDone.Size() > 0)
{
BasicBlock* toDo = vs.m_toDoAllPredsDone.Pop();
- fgValueNumberBlock(toDo, /*newVNsForPhis*/ false);
+ fgValueNumberBlock(toDo);
// Record that we've visited "toDo", and add successors to the right sets.
vs.FinishVisit(toDo);
}
@@ -4344,7 +4354,7 @@ void Compiler::fgValueNumber()
continue; // We may have run out, because of completed blocks on the not-all-preds done list.
}
- fgValueNumberBlock(toDo, /*newVNsForPhis*/ true);
+ fgValueNumberBlock(toDo);
// Record that we've visited "toDo", and add successors to the right sest.
vs.FinishVisit(toDo);
}
@@ -4357,7 +4367,7 @@ void Compiler::fgValueNumber()
fgVNPassesCompleted++;
}
-void Compiler::fgValueNumberBlock(BasicBlock* blk, bool newVNsForPhis)
+void Compiler::fgValueNumberBlock(BasicBlock* blk)
{
compCurBB = blk;
@@ -4488,75 +4498,93 @@ void Compiler::fgValueNumberBlock(BasicBlock* blk, bool newVNsForPhis)
}
}
- // Now do the same for "Heap".
- // Is there a phi for this block?
- if (blk->bbHeapSsaPhiFunc == nullptr)
+ // Now do the same for each MemoryKind.
+ for (MemoryKind memoryKind : allMemoryKinds())
{
- fgCurHeapVN = GetHeapPerSsaData(blk->bbHeapSsaNumIn)->m_vnPair.GetLiberal();
- assert(fgCurHeapVN != ValueNumStore::NoVN);
- }
- else
- {
- unsigned loopNum;
- ValueNum newHeapVN;
- if (optBlockIsLoopEntry(blk, &loopNum))
+ // Is there a phi for this block?
+ if (blk->bbMemorySsaPhiFunc[memoryKind] == nullptr)
{
- newHeapVN = fgHeapVNForLoopSideEffects(blk, loopNum);
+ fgCurMemoryVN[memoryKind] = GetMemoryPerSsaData(blk->bbMemorySsaNumIn[memoryKind])->m_vnPair.GetLiberal();
+ assert(fgCurMemoryVN[memoryKind] != ValueNumStore::NoVN);
}
else
{
- // Are all the VN's the same?
- BasicBlock::HeapPhiArg* phiArgs = blk->bbHeapSsaPhiFunc;
- assert(phiArgs != BasicBlock::EmptyHeapPhiDef);
- // There should be > 1 args to a phi.
- assert(phiArgs->m_nextArg != nullptr);
- ValueNum phiAppVN = vnStore->VNForIntCon(phiArgs->GetSsaNum());
- JITDUMP(" Building phi application: $%x = SSA# %d.\n", phiAppVN, phiArgs->GetSsaNum());
- bool allSame = true;
- ValueNum sameVN = GetHeapPerSsaData(phiArgs->GetSsaNum())->m_vnPair.GetLiberal();
- if (sameVN == ValueNumStore::NoVN)
+ if ((memoryKind == ByrefExposed) && byrefStatesMatchGcHeapStates)
{
- allSame = false;
+ // The update for GcHeap will copy its result to ByrefExposed.
+ assert(memoryKind < GcHeap);
+ assert(blk->bbMemorySsaPhiFunc[memoryKind] == blk->bbMemorySsaPhiFunc[GcHeap]);
+ continue;
}
- phiArgs = phiArgs->m_nextArg;
- while (phiArgs != nullptr)
+
+ unsigned loopNum;
+ ValueNum newMemoryVN;
+ if (optBlockIsLoopEntry(blk, &loopNum))
{
- ValueNum phiArgVN = GetHeapPerSsaData(phiArgs->GetSsaNum())->m_vnPair.GetLiberal();
- if (phiArgVN == ValueNumStore::NoVN || phiArgVN != sameVN)
+ newMemoryVN = fgMemoryVNForLoopSideEffects(memoryKind, blk, loopNum);
+ }
+ else
+ {
+ // Are all the VN's the same?
+ BasicBlock::MemoryPhiArg* phiArgs = blk->bbMemorySsaPhiFunc[memoryKind];
+ assert(phiArgs != BasicBlock::EmptyMemoryPhiDef);
+ // There should be > 1 args to a phi.
+ assert(phiArgs->m_nextArg != nullptr);
+ ValueNum phiAppVN = vnStore->VNForIntCon(phiArgs->GetSsaNum());
+ JITDUMP(" Building phi application: $%x = SSA# %d.\n", phiAppVN, phiArgs->GetSsaNum());
+ bool allSame = true;
+ ValueNum sameVN = GetMemoryPerSsaData(phiArgs->GetSsaNum())->m_vnPair.GetLiberal();
+ if (sameVN == ValueNumStore::NoVN)
{
allSame = false;
}
+ phiArgs = phiArgs->m_nextArg;
+ while (phiArgs != nullptr)
+ {
+ ValueNum phiArgVN = GetMemoryPerSsaData(phiArgs->GetSsaNum())->m_vnPair.GetLiberal();
+ if (phiArgVN == ValueNumStore::NoVN || phiArgVN != sameVN)
+ {
+ allSame = false;
+ }
#ifdef DEBUG
- ValueNum oldPhiAppVN = phiAppVN;
+ ValueNum oldPhiAppVN = phiAppVN;
#endif
- unsigned phiArgSSANum = phiArgs->GetSsaNum();
- ValueNum phiArgSSANumVN = vnStore->VNForIntCon(phiArgSSANum);
- JITDUMP(" Building phi application: $%x = SSA# %d.\n", phiArgSSANumVN, phiArgSSANum);
- phiAppVN = vnStore->VNForFunc(TYP_REF, VNF_Phi, phiArgSSANumVN, phiAppVN);
- JITDUMP(" Building phi application: $%x = phi($%x, $%x).\n", phiAppVN, phiArgSSANumVN, oldPhiAppVN);
- phiArgs = phiArgs->m_nextArg;
- }
- if (allSame)
- {
- newHeapVN = sameVN;
+ unsigned phiArgSSANum = phiArgs->GetSsaNum();
+ ValueNum phiArgSSANumVN = vnStore->VNForIntCon(phiArgSSANum);
+ JITDUMP(" Building phi application: $%x = SSA# %d.\n", phiArgSSANumVN, phiArgSSANum);
+ phiAppVN = vnStore->VNForFunc(TYP_REF, VNF_Phi, phiArgSSANumVN, phiAppVN);
+ JITDUMP(" Building phi application: $%x = phi($%x, $%x).\n", phiAppVN, phiArgSSANumVN,
+ oldPhiAppVN);
+ phiArgs = phiArgs->m_nextArg;
+ }
+ if (allSame)
+ {
+ newMemoryVN = sameVN;
+ }
+ else
+ {
+ newMemoryVN =
+ vnStore->VNForFunc(TYP_REF, VNF_PhiMemoryDef, vnStore->VNForHandle(ssize_t(blk), 0), phiAppVN);
+ }
}
- else
+ GetMemoryPerSsaData(blk->bbMemorySsaNumIn[memoryKind])->m_vnPair.SetLiberal(newMemoryVN);
+ fgCurMemoryVN[memoryKind] = newMemoryVN;
+ if ((memoryKind == GcHeap) && byrefStatesMatchGcHeapStates)
{
- newHeapVN =
- vnStore->VNForFunc(TYP_REF, VNF_PhiHeapDef, vnStore->VNForHandle(ssize_t(blk), 0), phiAppVN);
+ // Keep the CurMemoryVNs in sync
+ fgCurMemoryVN[ByrefExposed] = newMemoryVN;
}
}
- GetHeapPerSsaData(blk->bbHeapSsaNumIn)->m_vnPair.SetLiberal(newHeapVN);
- fgCurHeapVN = newHeapVN;
- }
#ifdef DEBUG
- if (verbose)
- {
- printf("The SSA definition for heap (#%d) at start of BB%02u is ", blk->bbHeapSsaNumIn, blk->bbNum);
- vnPrint(fgCurHeapVN, 1);
- printf("\n");
- }
+ if (verbose)
+ {
+ printf("The SSA definition for %s (#%d) at start of BB%02u is ", memoryKindNames[memoryKind],
+ blk->bbMemorySsaNumIn[memoryKind], blk->bbNum);
+ vnPrint(fgCurMemoryVN[memoryKind], 1);
+ printf("\n");
+ }
#endif // DEBUG
+ }
// Now iterate over the remaining statements, and their trees.
for (GenTreePtr stmt = firstNonPhi; stmt != nullptr; stmt = stmt->gtNext)
@@ -4592,15 +4620,30 @@ void Compiler::fgValueNumberBlock(BasicBlock* blk, bool newVNsForPhis)
#endif
}
- if (blk->bbHeapSsaNumOut != blk->bbHeapSsaNumIn)
+ for (MemoryKind memoryKind : allMemoryKinds())
{
- GetHeapPerSsaData(blk->bbHeapSsaNumOut)->m_vnPair.SetLiberal(fgCurHeapVN);
+ if ((memoryKind == GcHeap) && byrefStatesMatchGcHeapStates)
+ {
+ // The update to the shared SSA data will have already happened for ByrefExposed.
+ assert(memoryKind > ByrefExposed);
+ assert(blk->bbMemorySsaNumOut[memoryKind] == blk->bbMemorySsaNumOut[ByrefExposed]);
+ assert(GetMemoryPerSsaData(blk->bbMemorySsaNumOut[memoryKind])->m_vnPair.GetLiberal() ==
+ fgCurMemoryVN[memoryKind]);
+ continue;
+ }
+
+ if (blk->bbMemorySsaNumOut[memoryKind] != blk->bbMemorySsaNumIn[memoryKind])
+ {
+ GetMemoryPerSsaData(blk->bbMemorySsaNumOut[memoryKind])->m_vnPair.SetLiberal(fgCurMemoryVN[memoryKind]);
+ }
}
compCurBB = nullptr;
}
-ValueNum Compiler::fgHeapVNForLoopSideEffects(BasicBlock* entryBlock, unsigned innermostLoopNum)
+ValueNum Compiler::fgMemoryVNForLoopSideEffects(MemoryKind memoryKind,
+ BasicBlock* entryBlock,
+ unsigned innermostLoopNum)
{
// "loopNum" is the innermost loop for which "blk" is the entry; find the outermost one.
assert(innermostLoopNum != BasicBlock::NOT_IN_LOOP);
@@ -4619,27 +4662,27 @@ ValueNum Compiler::fgHeapVNForLoopSideEffects(BasicBlock* entryBlock, unsigned i
#ifdef DEBUG
if (verbose)
{
- printf("Computing heap state for block BB%02u, entry block for loops %d to %d:\n", entryBlock->bbNum,
- innermostLoopNum, loopNum);
+ printf("Computing %s state for block BB%02u, entry block for loops %d to %d:\n", memoryKindNames[memoryKind],
+ entryBlock->bbNum, innermostLoopNum, loopNum);
}
#endif // DEBUG
- // If this loop has heap havoc effects, just use a new, unique VN.
- if (optLoopTable[loopNum].lpLoopHasHeapHavoc)
+ // If this loop has memory havoc effects, just use a new, unique VN.
+ if (optLoopTable[loopNum].lpLoopHasMemoryHavoc[memoryKind])
{
ValueNum res = vnStore->VNForExpr(entryBlock, TYP_REF);
#ifdef DEBUG
if (verbose)
{
- printf(" Loop %d has heap havoc effect; heap state is new fresh $%x.\n", loopNum, res);
+ printf(" Loop %d has memory havoc effect; heap state is new fresh $%x.\n", loopNum, res);
}
#endif // DEBUG
return res;
}
// Otherwise, find the predecessors of the entry block that are not in the loop.
- // If there is only one such, use its heap value as the "base." If more than one,
- // use a new unique heap VN.
+ // If there is only one such, use its memory value as the "base." If more than one,
+ // use a new unique VN.
BasicBlock* nonLoopPred = nullptr;
bool multipleNonLoopPreds = false;
for (flowList* pred = BlockPredsWithEH(entryBlock); pred != nullptr; pred = pred->flNext)
@@ -4671,122 +4714,187 @@ ValueNum Compiler::fgHeapVNForLoopSideEffects(BasicBlock* entryBlock, unsigned i
#ifdef DEBUG
if (verbose)
{
- printf(" Therefore, heap state is new, fresh $%x.\n", res);
+ printf(" Therefore, memory state is new, fresh $%x.\n", res);
}
#endif // DEBUG
return res;
}
// Otherwise, there is a single non-loop pred.
assert(nonLoopPred != nullptr);
- // What is it's heap post-state?
- ValueNum newHeapVN = GetHeapPerSsaData(nonLoopPred->bbHeapSsaNumOut)->m_vnPair.GetLiberal();
- assert(newHeapVN !=
+ // What is its memory post-state?
+ ValueNum newMemoryVN = GetMemoryPerSsaData(nonLoopPred->bbMemorySsaNumOut[memoryKind])->m_vnPair.GetLiberal();
+ assert(newMemoryVN !=
ValueNumStore::NoVN); // We must have processed the single non-loop pred before reaching the loop entry.
#ifdef DEBUG
if (verbose)
{
- printf(" Init heap state is $%x, with new, fresh VN at:\n", newHeapVN);
+ printf(" Init %s state is $%x, with new, fresh VN at:\n", memoryKindNames[memoryKind], newMemoryVN);
}
#endif // DEBUG
// Modify "base" by setting all the modified fields/field maps/array maps to unknown values.
- // First the fields/field maps.
-
- Compiler::LoopDsc::FieldHandleSet* fieldsMod = optLoopTable[loopNum].lpFieldsModified;
- if (fieldsMod != nullptr)
+ // These annotations apply specifically to the GcHeap, where we disambiguate across such stores.
+ if (memoryKind == GcHeap)
{
- for (Compiler::LoopDsc::FieldHandleSet::KeyIterator ki = fieldsMod->Begin(); !ki.Equal(fieldsMod->End()); ++ki)
+ // First the fields/field maps.
+ Compiler::LoopDsc::FieldHandleSet* fieldsMod = optLoopTable[loopNum].lpFieldsModified;
+ if (fieldsMod != nullptr)
{
- CORINFO_FIELD_HANDLE fldHnd = ki.Get();
- ValueNum fldHndVN = vnStore->VNForHandle(ssize_t(fldHnd), GTF_ICON_FIELD_HDL);
+ for (Compiler::LoopDsc::FieldHandleSet::KeyIterator ki = fieldsMod->Begin(); !ki.Equal(fieldsMod->End());
+ ++ki)
+ {
+ CORINFO_FIELD_HANDLE fldHnd = ki.Get();
+ ValueNum fldHndVN = vnStore->VNForHandle(ssize_t(fldHnd), GTF_ICON_FIELD_HDL);
#ifdef DEBUG
- if (verbose)
- {
- const char* modName;
- const char* fldName = eeGetFieldName(fldHnd, &modName);
- printf(" VNForHandle(Fseq[%s]) is " STR_VN "%x\n", fldName, fldHndVN);
+ if (verbose)
+ {
+ const char* modName;
+ const char* fldName = eeGetFieldName(fldHnd, &modName);
+ printf(" VNForHandle(Fseq[%s]) is " STR_VN "%x\n", fldName, fldHndVN);
- printf(" fgCurHeapVN assigned:\n");
- }
+ printf(" fgCurMemoryVN assigned:\n");
+ }
#endif // DEBUG
- newHeapVN = vnStore->VNForMapStore(TYP_REF, newHeapVN, fldHndVN, vnStore->VNForExpr(entryBlock, TYP_REF));
+ newMemoryVN =
+ vnStore->VNForMapStore(TYP_REF, newMemoryVN, fldHndVN, vnStore->VNForExpr(entryBlock, TYP_REF));
+ }
}
- }
- // Now do the array maps.
- Compiler::LoopDsc::ClassHandleSet* elemTypesMod = optLoopTable[loopNum].lpArrayElemTypesModified;
- if (elemTypesMod != nullptr)
- {
- for (Compiler::LoopDsc::ClassHandleSet::KeyIterator ki = elemTypesMod->Begin(); !ki.Equal(elemTypesMod->End());
- ++ki)
+ // Now do the array maps.
+ Compiler::LoopDsc::ClassHandleSet* elemTypesMod = optLoopTable[loopNum].lpArrayElemTypesModified;
+ if (elemTypesMod != nullptr)
{
- CORINFO_CLASS_HANDLE elemClsHnd = ki.Get();
+ for (Compiler::LoopDsc::ClassHandleSet::KeyIterator ki = elemTypesMod->Begin();
+ !ki.Equal(elemTypesMod->End()); ++ki)
+ {
+ CORINFO_CLASS_HANDLE elemClsHnd = ki.Get();
#ifdef DEBUG
- if (verbose)
- {
- var_types elemTyp = DecodeElemType(elemClsHnd);
- if (varTypeIsStruct(elemTyp))
- {
- printf(" Array map %s[]\n", eeGetClassName(elemClsHnd));
- }
- else
+ if (verbose)
{
- printf(" Array map %s[]\n", varTypeName(elemTyp));
+ var_types elemTyp = DecodeElemType(elemClsHnd);
+ if (varTypeIsStruct(elemTyp))
+ {
+ printf(" Array map %s[]\n", eeGetClassName(elemClsHnd));
+ }
+ else
+ {
+ printf(" Array map %s[]\n", varTypeName(elemTyp));
+ }
+ printf(" fgCurMemoryVN assigned:\n");
}
- printf(" fgCurHeapVN assigned:\n");
- }
#endif // DEBUG
- ValueNum elemTypeVN = vnStore->VNForHandle(ssize_t(elemClsHnd), GTF_ICON_CLASS_HDL);
- ValueNum uniqueVN = vnStore->VNForExpr(entryBlock, TYP_REF);
- newHeapVN = vnStore->VNForMapStore(TYP_REF, newHeapVN, elemTypeVN, uniqueVN);
+ ValueNum elemTypeVN = vnStore->VNForHandle(ssize_t(elemClsHnd), GTF_ICON_CLASS_HDL);
+ ValueNum uniqueVN = vnStore->VNForExpr(entryBlock, TYP_REF);
+ newMemoryVN = vnStore->VNForMapStore(TYP_REF, newMemoryVN, elemTypeVN, uniqueVN);
+ }
}
}
+ else
+ {
+ // If there were any fields/elements modified, this should have been recorded as havoc
+ // for ByrefExposed.
+ assert(memoryKind == ByrefExposed);
+ assert((optLoopTable[loopNum].lpFieldsModified == nullptr) ||
+ optLoopTable[loopNum].lpLoopHasMemoryHavoc[memoryKind]);
+ assert((optLoopTable[loopNum].lpArrayElemTypesModified == nullptr) ||
+ optLoopTable[loopNum].lpLoopHasMemoryHavoc[memoryKind]);
+ }
#ifdef DEBUG
if (verbose)
{
- printf(" Final heap state is $%x.\n", newHeapVN);
+ printf(" Final %s state is $%x.\n", memoryKindNames[memoryKind], newMemoryVN);
}
#endif // DEBUG
- return newHeapVN;
+ return newMemoryVN;
+}
+
+void Compiler::fgMutateGcHeap(GenTreePtr tree DEBUGARG(const char* msg))
+{
+ // Update the current memory VN, and if we're tracking the heap SSA # caused by this node, record it.
+ recordGcHeapStore(tree, vnStore->VNForExpr(compCurBB, TYP_REF) DEBUGARG(msg));
}
-void Compiler::fgMutateHeap(GenTreePtr tree DEBUGARG(const char* msg))
+void Compiler::fgMutateAddressExposedLocal(GenTreePtr tree DEBUGARG(const char* msg))
{
- // bbHeapDef must be set to true for any block that Mutates the global Heap
- assert(compCurBB->bbHeapDef);
+ // Update the current ByrefExposed VN, and if we're tracking the heap SSA # caused by this node, record it.
+ recordAddressExposedLocalStore(tree, vnStore->VNForExpr(compCurBB) DEBUGARG(msg));
+}
- fgCurHeapVN = vnStore->VNForExpr(compCurBB, TYP_REF);
+void Compiler::recordGcHeapStore(GenTreePtr curTree, ValueNum gcHeapVN DEBUGARG(const char* msg))
+{
+ // bbMemoryDef must include GcHeap for any block that mutates the GC Heap
+ // and GC Heap mutations are also ByrefExposed mutations
+ assert((compCurBB->bbMemoryDef & memoryKindSet(GcHeap, ByrefExposed)) == memoryKindSet(GcHeap, ByrefExposed));
+ fgCurMemoryVN[GcHeap] = gcHeapVN;
- // If we're tracking the heap SSA # caused by this node, record it.
- fgValueNumberRecordHeapSsa(tree);
+ if (byrefStatesMatchGcHeapStates)
+ {
+ // Since GcHeap and ByrefExposed share SSA nodes, they need to share
+ // value numbers too.
+ fgCurMemoryVN[ByrefExposed] = gcHeapVN;
+ }
+ else
+ {
+ // GcHeap and ByrefExposed have different defnums and VNs. We conservatively
+ // assume that this GcHeap store may alias any byref load/store, so don't
+ // bother trying to record the map/select stuff, and instead just an opaque VN
+ // for ByrefExposed
+ fgCurMemoryVN[ByrefExposed] = vnStore->VNForExpr(compCurBB);
+ }
#ifdef DEBUG
if (verbose)
{
- printf(" fgCurHeapVN assigned by %s at ", msg);
- Compiler::printTreeID(tree);
- printf(" to new unique VN: " STR_VN "%x.\n", fgCurHeapVN);
+ printf(" fgCurMemoryVN[GcHeap] assigned by %s at ", msg);
+ Compiler::printTreeID(curTree);
+ printf(" to VN: " STR_VN "%x.\n", gcHeapVN);
}
#endif // DEBUG
+
+ // If byrefStatesMatchGcHeapStates is true, then since GcHeap and ByrefExposed share
+ // their SSA map entries, the below will effectively update both.
+ fgValueNumberRecordMemorySsa(GcHeap, curTree);
}
-void Compiler::fgValueNumberRecordHeapSsa(GenTreePtr tree)
+void Compiler::recordAddressExposedLocalStore(GenTreePtr curTree, ValueNum memoryVN DEBUGARG(const char* msg))
+{
+ // This should only happen if GcHeap and ByrefExposed are being tracked separately;
+ // otherwise we'd go through recordGcHeapStore.
+ assert(!byrefStatesMatchGcHeapStates);
+
+ // bbMemoryDef must include ByrefExposed for any block that mutates an address-exposed local
+ assert((compCurBB->bbMemoryDef & memoryKindSet(ByrefExposed)) != 0);
+ fgCurMemoryVN[ByrefExposed] = memoryVN;
+
+#ifdef DEBUG
+ if (verbose)
+ {
+ printf(" fgCurMemoryVN[ByrefExposed] assigned by %s at ", msg);
+ Compiler::printTreeID(curTree);
+ printf(" to VN: " STR_VN "%x.\n", memoryVN);
+ }
+#endif // DEBUG
+
+ fgValueNumberRecordMemorySsa(ByrefExposed, curTree);
+}
+
+void Compiler::fgValueNumberRecordMemorySsa(MemoryKind memoryKind, GenTreePtr tree)
{
unsigned ssaNum;
- if (GetHeapSsaMap()->Lookup(tree, &ssaNum))
+ if (GetMemorySsaMap(memoryKind)->Lookup(tree, &ssaNum))
{
- GetHeapPerSsaData(ssaNum)->m_vnPair.SetLiberal(fgCurHeapVN);
+ GetMemoryPerSsaData(ssaNum)->m_vnPair.SetLiberal(fgCurMemoryVN[memoryKind]);
#ifdef DEBUG
if (verbose)
{
printf("Node ");
Compiler::printTreeID(tree);
- printf(" sets heap SSA # %d to VN $%x: ", ssaNum, fgCurHeapVN);
- vnStore->vnDump(this, fgCurHeapVN);
+ printf(" sets %s SSA # %d to VN $%x: ", memoryKindNames[memoryKind], ssaNum, fgCurMemoryVN[memoryKind]);
+ vnStore->vnDump(this, fgCurMemoryVN[memoryKind]);
printf("\n");
}
#endif // DEBUG
@@ -4890,8 +4998,8 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
GenTree* lhs = tree->gtGetOp1();
GenTree* rhs = tree->gtGetOp2();
#ifdef DEBUG
- // Sometimes we query the heap ssa map, and need a dummy location for the ignored result.
- unsigned heapSsaNum;
+ // Sometimes we query the memory ssa map in an assertion, and need a dummy location for the ignored result.
+ unsigned memorySsaNum;
#endif
if (tree->OperIsInitBlkOp())
@@ -4902,8 +5010,8 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
if (tree->DefinesLocal(this, &lclVarTree, &isEntire))
{
assert(lclVarTree->gtFlags & GTF_VAR_DEF);
- // Should not have been recorded as updating the heap.
- assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum));
+ // Should not have been recorded as updating the GC heap.
+ assert(!GetMemorySsaMap(GcHeap)->Lookup(tree, &memorySsaNum));
unsigned lclNum = lclVarTree->GetLclNum();
@@ -4911,6 +5019,9 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
// SSA names in which to store VN's on defs. We'll yield unique VN's when we read from them.
if (!fgExcludeFromSsa(lclNum))
{
+ // Should not have been recorded as updating ByrefExposed.
+ assert(!GetMemorySsaMap(ByrefExposed)->Lookup(tree, &memorySsaNum));
+
unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lclVarTree);
ValueNum initBlkVN = ValueNumStore::NoVN;
@@ -4941,12 +5052,16 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
}
#endif // DEBUG
}
+ else if (lvaVarAddrExposed(lclVarTree->gtLclNum))
+ {
+ fgMutateAddressExposedLocal(tree DEBUGARG("INITBLK - address-exposed local"));
+ }
}
else
{
- // For now, arbitrary side effect on Heap.
+ // For now, arbitrary side effect on GcHeap/ByrefExposed.
// TODO-CQ: Why not be complete, and get this case right?
- fgMutateHeap(tree DEBUGARG("INITBLK - non local"));
+ fgMutateGcHeap(tree DEBUGARG("INITBLK - non local"));
}
// Initblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we
// want to be able to give VN's to.
@@ -4956,7 +5071,7 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
{
assert(tree->OperIsCopyBlkOp());
// TODO-Cleanup: We should factor things so that we uniformly rely on "PtrTo" VN's, and
- // the heap cases can be shared with assignments.
+ // the memory cases can be shared with assignments.
GenTreeLclVarCommon* lclVarTree = nullptr;
bool isEntire = false;
// Note that we don't care about exceptions here, since we're only using the values
@@ -4964,14 +5079,17 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
if (tree->DefinesLocal(this, &lclVarTree, &isEntire))
{
- // Should not have been recorded as updating the heap.
- assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum));
+ // Should not have been recorded as updating the GC heap.
+ assert(!GetMemorySsaMap(GcHeap)->Lookup(tree, &memorySsaNum));
unsigned lhsLclNum = lclVarTree->GetLclNum();
FieldSeqNode* lhsFldSeq = nullptr;
// If it's excluded from SSA, don't need to do anything.
if (!fgExcludeFromSsa(lhsLclNum))
{
+ // Should not have been recorded as updating ByrefExposed.
+ assert(!GetMemorySsaMap(ByrefExposed)->Lookup(tree, &memorySsaNum));
+
unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lclVarTree);
if (lhs->IsLocalExpr(this, &lclVarTree, &lhsFldSeq) ||
@@ -5082,10 +5200,10 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
if (fldSeqForStaticVar != FieldSeqStore::NotAField())
{
- // We model statics as indices into the heap variable.
+ // We model statics as indices into GcHeap (which is a subset of ByrefExposed).
ValueNum selectedStaticVar;
size_t structSize = 0;
- selectedStaticVar = vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN,
+ selectedStaticVar = vnStore->VNApplySelectors(VNK_Liberal, fgCurMemoryVN[GcHeap],
fldSeqForStaticVar, &structSize);
selectedStaticVar =
vnStore->VNApplySelectorsTypeCheck(selectedStaticVar, indType, structSize);
@@ -5162,12 +5280,16 @@ void Compiler::fgValueNumberBlockAssignment(GenTreePtr tree, bool evalAsgLhsInd)
}
#endif // DEBUG
}
+ else if (lvaVarAddrExposed(lhsLclNum))
+ {
+ fgMutateAddressExposedLocal(tree DEBUGARG("COPYBLK - address-exposed local"));
+ }
}
else
{
- // For now, arbitrary side effect on Heap.
+ // For now, arbitrary side effect on GcHeap/ByrefExposed.
// TODO-CQ: Why not be complete, and get this case right?
- fgMutateHeap(tree DEBUGARG("COPYBLK - non local"));
+ fgMutateGcHeap(tree DEBUGARG("COPYBLK - non local"));
}
// Copyblock's are of type void. Give them the void "value" -- they may occur in argument lists, which we want
// to be able to give VN's to.
@@ -5223,8 +5345,22 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
if (lcl->gtSsaNum == SsaConfig::RESERVED_SSA_NUM)
{
- // Not an SSA variable. Assign each occurrence a new, unique, VN.
- lcl->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lcl->TypeGet()));
+ // Not an SSA variable.
+
+ if (lvaVarAddrExposed(lclNum))
+ {
+ // Address-exposed locals are part of ByrefExposed.
+ ValueNum addrVN = vnStore->VNForFunc(TYP_BYREF, VNF_PtrToLoc, vnStore->VNForIntCon(lclNum),
+ vnStore->VNForFieldSeq(nullptr));
+ ValueNum loadVN = fgValueNumberByrefExposedLoad(typ, addrVN);
+
+ lcl->gtVNPair.SetBoth(loadVN);
+ }
+ else
+ {
+ // Assign odd cases a new, unique, VN.
+ lcl->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lcl->TypeGet()));
+ }
}
else
{
@@ -5366,11 +5502,11 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
if (isVolatile)
{
- // For Volatile indirection, first mutate the global heap
- fgMutateHeap(tree DEBUGARG("GTF_FLD_VOLATILE - read"));
+ // For Volatile indirection, first mutate GcHeap/ByrefExposed
+ fgMutateGcHeap(tree DEBUGARG("GTF_FLD_VOLATILE - read"));
}
- // We just mutate the heap if isVolatile is true, and then do the read as normal.
+ // We just mutate GcHeap/ByrefExposed if isVolatile is true, and then do the read as normal.
//
// This allows:
// 1: read s;
@@ -5399,13 +5535,13 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
else
{
// This is a reference to heap memory.
- // We model statics as indices into the heap variable.
+ // We model statics as indices into GcHeap (which is a subset of ByrefExposed).
FieldSeqNode* fldSeqForStaticVar =
GetFieldSeqStore()->CreateSingleton(tree->gtClsVar.gtClsVarHnd);
size_t structSize = 0;
- selectedStaticVar =
- vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN, fldSeqForStaticVar, &structSize);
+ selectedStaticVar = vnStore->VNApplySelectors(VNK_Liberal, fgCurMemoryVN[GcHeap],
+ fldSeqForStaticVar, &structSize);
selectedStaticVar =
vnStore->VNApplySelectorsTypeCheck(selectedStaticVar, tree->TypeGet(), structSize);
@@ -5429,8 +5565,8 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
break;
case GT_MEMORYBARRIER: // Leaf
- // For MEMORYBARRIER add an arbitrary side effect on Heap.
- fgMutateHeap(tree DEBUGARG("MEMORYBARRIER"));
+ // For MEMORYBARRIER add an arbitrary side effect on GcHeap/ByrefExposed.
+ fgMutateGcHeap(tree DEBUGARG("MEMORYBARRIER"));
break;
// These do not represent values.
@@ -5462,8 +5598,8 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
else if (GenTree::OperIsSimple(oper))
{
#ifdef DEBUG
- // Sometimes we query the heap ssa map, and need a dummy location for the ignored result.
- unsigned heapSsaNum;
+ // Sometimes we query the memory ssa map in an assertion, and need a dummy location for the ignored result.
+ unsigned memorySsaNum;
#endif
if (GenTree::OperIsAssignment(oper) && !varTypeIsStruct(tree))
@@ -5482,7 +5618,7 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
// If the LHS is an IND, we didn't evaluate it when we visited it previously.
// But we didn't know that the parent was an op=. We do now, so go back and evaluate it.
// (We actually check if the effective val is the IND. We will have evaluated any non-last
- // args of an LHS comma already -- including their heap effects.)
+ // args of an LHS comma already -- including their memory effects.)
GenTreePtr lhsVal = lhs->gtEffectiveVal(/*commaOnly*/ true);
if (lhsVal->OperIsIndir() || (lhsVal->OperGet() == GT_CLS_VAR))
{
@@ -5567,11 +5703,14 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
GenTreeLclVarCommon* lcl = lhs->AsLclVarCommon();
unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lcl);
- // Should not have been recorded as updating the heap.
- assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum));
+ // Should not have been recorded as updating the GC heap.
+ assert(!GetMemorySsaMap(GcHeap)->Lookup(tree, &memorySsaNum));
if (lclDefSsaNum != SsaConfig::RESERVED_SSA_NUM)
{
+ // Should not have been recorded as updating ByrefExposed mem.
+ assert(!GetMemorySsaMap(ByrefExposed)->Lookup(tree, &memorySsaNum));
+
assert(rhsVNPair.GetLiberal() != ValueNumStore::NoVN);
lhs->gtVNPair = rhsVNPair;
@@ -5591,6 +5730,16 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
}
#endif // DEBUG
}
+ else if (lvaVarAddrExposed(lcl->gtLclNum))
+ {
+ // We could use MapStore here and MapSelect on reads of address-exposed locals
+ // (using the local nums as selectors) to get e.g. propagation of values
+ // through address-taken locals in regions of code with no calls or byref
+ // writes.
+ // For now, just use a new opaque VN.
+ ValueNum heapVN = vnStore->VNForExpr(compCurBB);
+ recordAddressExposedLocalStore(tree, heapVN DEBUGARG("local assign"));
+ }
#ifdef DEBUG
else
{
@@ -5598,7 +5747,8 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
{
JITDUMP("Tree ");
Compiler::printTreeID(tree);
- printf(" assigns to local var V%02u; excluded from SSA, so value not tracked.\n",
+ printf(" assigns to non-address-taken local var V%02u; excluded from SSA, so value not "
+ "tracked.\n",
lcl->GetLclNum());
}
}
@@ -5610,8 +5760,8 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
GenTreeLclFld* lclFld = lhs->AsLclFld();
unsigned lclDefSsaNum = GetSsaNumForLocalVarDef(lclFld);
- // Should not have been recorded as updating the heap.
- assert(!GetHeapSsaMap()->Lookup(tree, &heapSsaNum));
+ // Should not have been recorded as updating the GC heap.
+ assert(!GetMemorySsaMap(GcHeap)->Lookup(tree, &memorySsaNum));
if (lclDefSsaNum != SsaConfig::RESERVED_SSA_NUM)
{
@@ -5664,6 +5814,15 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
}
#endif // DEBUG
}
+ else if (lvaVarAddrExposed(lclFld->gtLclNum))
+ {
+ // This side-effects ByrefExposed. Just use a new opaque VN.
+ // As with GT_LCL_VAR, we could probably use MapStore here and MapSelect at corresponding
+ // loads, but to do so would have to identify the subset of address-exposed locals
+ // whose fields can be disambiguated.
+ ValueNum heapVN = vnStore->VNForExpr(compCurBB);
+ recordAddressExposedLocalStore(tree, heapVN DEBUGARG("local field assign"));
+ }
}
break;
@@ -5678,8 +5837,8 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
if (isVolatile)
{
- // For Volatile store indirection, first mutate the global heap
- fgMutateHeap(lhs DEBUGARG("GTF_IND_VOLATILE - store"));
+ // For Volatile store indirection, first mutate GcHeap/ByrefExposed
+ fgMutateGcHeap(lhs DEBUGARG("GTF_IND_VOLATILE - store"));
tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, lhs->TypeGet()));
}
@@ -5797,6 +5956,17 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
}
#endif // DEBUG
}
+ else if (lvaVarAddrExposed(lclNum))
+ {
+ // Need to record the effect on ByrefExposed.
+ // We could use MapStore here and MapSelect on reads of address-exposed locals
+ // (using the local nums as selectors) to get e.g. propagation of values
+ // through address-taken locals in regions of code with no calls or byref
+ // writes.
+ // For now, just use a new opaque VN.
+ ValueNum heapVN = vnStore->VNForExpr(compCurBB);
+ recordAddressExposedLocalStore(tree, heapVN DEBUGARG("PtrToLoc indir"));
+ }
}
}
@@ -5832,9 +6002,9 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
}
#endif // DEBUG
- fgValueNumberArrIndexAssign(elemTypeEq, arrVN, inxVN, fldSeq, rhsVNPair.GetLiberal(),
- lhs->TypeGet());
- fgValueNumberRecordHeapSsa(tree);
+ ValueNum heapVN = fgValueNumberArrIndexAssign(elemTypeEq, arrVN, inxVN, fldSeq,
+ rhsVNPair.GetLiberal(), lhs->TypeGet());
+ recordGcHeapStore(tree, heapVN DEBUGARG("Array element assignment"));
}
// It may be that we haven't parsed it yet. Try.
else if (lhs->gtFlags & GTF_IND_ARR_INDEX)
@@ -5851,7 +6021,7 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
arg->ParseArrayAddress(this, &arrInfo, &arr, &inxVN, &fldSeq);
if (arr == nullptr)
{
- fgMutateHeap(tree DEBUGARG("assignment to unparseable array expression"));
+ fgMutateGcHeap(tree DEBUGARG("assignment to unparseable array expression"));
return;
}
// Otherwise, parsing succeeded.
@@ -5869,15 +6039,15 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
fldSeq = GetFieldSeqStore()->Append(fldSeq, zeroOffsetFldSeq);
}
- fgValueNumberArrIndexAssign(elemTypeEq, arrVN, inxVN, fldSeq, rhsVNPair.GetLiberal(),
- lhs->TypeGet());
- fgValueNumberRecordHeapSsa(tree);
+ ValueNum heapVN = fgValueNumberArrIndexAssign(elemTypeEq, arrVN, inxVN, fldSeq,
+ rhsVNPair.GetLiberal(), lhs->TypeGet());
+ recordGcHeapStore(tree, heapVN DEBUGARG("assignment to unparseable array expression"));
}
else if (arg->IsFieldAddr(this, &obj, &staticOffset, &fldSeq))
{
if (fldSeq == FieldSeqStore::NotAField())
{
- fgMutateHeap(tree DEBUGARG("NotAField"));
+ fgMutateGcHeap(tree DEBUGARG("NotAField"));
}
else
{
@@ -5892,8 +6062,8 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
assert(staticOffset == nullptr);
}
#endif // DEBUG
- // Get the first (instance or static) field from field seq. Heap[field] will yield the
- // "field map".
+ // Get the first (instance or static) field from field seq. GcHeap[field] will yield
+ // the "field map".
if (fldSeq->IsFirstElemFieldSeq())
{
fldSeq = fldSeq->m_next;
@@ -5906,7 +6076,8 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
// The final field in the sequence will need to match the 'indType'
var_types indType = lhs->TypeGet();
- ValueNum fldMapVN = vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN, firstFieldOnly);
+ ValueNum fldMapVN =
+ vnStore->VNApplySelectors(VNK_Liberal, fgCurMemoryVN[GcHeap], firstFieldOnly);
// The type of the field is "struct" if there are more fields in the sequence,
// otherwise it is the type returned from VNApplySelectors above.
@@ -5962,8 +6133,8 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
storeVal, indType, compCurBB);
}
- newFldMapVN = vnStore->VNApplySelectorsAssign(VNK_Liberal, fgCurHeapVN, fldSeq,
- storeVal, indType, compCurBB);
+ newFldMapVN = vnStore->VNApplySelectorsAssign(VNK_Liberal, fgCurMemoryVN[GcHeap],
+ fldSeq, storeVal, indType, compCurBB);
}
// It is not strictly necessary to set the lhs value number,
@@ -5973,26 +6144,47 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
#ifdef DEBUG
if (verbose)
{
- printf(" fgCurHeapVN assigned:\n");
+ printf(" fgCurMemoryVN assigned:\n");
}
#endif // DEBUG
- // bbHeapDef must be set to true for any block that Mutates the global Heap
- assert(compCurBB->bbHeapDef);
+ // bbMemoryDef must include GcHeap for any block that mutates the GC heap
+ assert((compCurBB->bbMemoryDef & memoryKindSet(GcHeap)) != 0);
- // Update the field map for firstField in Heap to this new value.
- fgCurHeapVN = vnStore->VNApplySelectorsAssign(VNK_Liberal, fgCurHeapVN, firstFieldOnly,
- newFldMapVN, indType, compCurBB);
+ // Update the field map for firstField in GcHeap to this new value.
+ ValueNum heapVN =
+ vnStore->VNApplySelectorsAssign(VNK_Liberal, fgCurMemoryVN[GcHeap], firstFieldOnly,
+ newFldMapVN, indType, compCurBB);
- fgValueNumberRecordHeapSsa(tree);
+ recordGcHeapStore(tree, heapVN DEBUGARG("StoreField"));
}
}
else
{
- GenTreeLclVarCommon* dummyLclVarTree = nullptr;
- if (!tree->DefinesLocal(this, &dummyLclVarTree))
+ GenTreeLclVarCommon* lclVarTree = nullptr;
+ bool isLocal = tree->DefinesLocal(this, &lclVarTree);
+
+ if (isLocal && lvaVarAddrExposed(lclVarTree->gtLclNum))
+ {
+ // Store to address-exposed local; need to record the effect on ByrefExposed.
+ // We could use MapStore here and MapSelect on reads of address-exposed locals
+ // (using the local nums as selectors) to get e.g. propagation of values
+ // through address-taken locals in regions of code with no calls or byref
+ // writes.
+ // For now, just use a new opaque VN.
+ ValueNum memoryVN = vnStore->VNForExpr(compCurBB);
+ recordAddressExposedLocalStore(tree, memoryVN DEBUGARG("PtrToLoc indir"));
+ }
+ else if (!isLocal)
{
- // If it doesn't define a local, then it might update the heap.
- fgMutateHeap(tree DEBUGARG("assign-of-IND"));
+ // If it doesn't define a local, then it might update GcHeap/ByrefExposed.
+ // For the new ByrefExposed VN, we could use an operator here like
+ // VNF_ByrefExposedStore that carries the VNs of the pointer and RHS, then
+ // at byref loads if the current ByrefExposed VN happens to be
+ // VNF_ByrefExposedStore with the same pointer VN, we could propagate the
+ // VN from the RHS to the VN for the load. This would e.g. allow tracking
+ // values through assignments to out params. For now, just model this
+ // as an opaque GcHeap/ByrefExposed mutation.
+ fgMutateGcHeap(tree DEBUGARG("assign-of-IND"));
}
}
}
@@ -6008,17 +6200,17 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
if (isVolatile)
{
- // For Volatile store indirection, first mutate the global heap
- fgMutateHeap(lhs DEBUGARG("GTF_CLS_VAR - store")); // always change fgCurHeapVN
+ // For Volatile store indirection, first mutate GcHeap/ByrefExposed
+ fgMutateGcHeap(lhs DEBUGARG("GTF_CLS_VAR - store")); // always change fgCurMemoryVN
}
- // We model statics as indices into the heap variable.
+ // We model statics as indices into GcHeap (which is a subset of ByrefExposed).
FieldSeqNode* fldSeqForStaticVar = GetFieldSeqStore()->CreateSingleton(lhs->gtClsVar.gtClsVarHnd);
assert(fldSeqForStaticVar != FieldSeqStore::NotAField());
ValueNum storeVal = rhsVNPair.GetLiberal(); // The value number from the rhs of the assignment
- storeVal = vnStore->VNApplySelectorsAssign(VNK_Liberal, fgCurHeapVN, fldSeqForStaticVar, storeVal,
- lhs->TypeGet(), compCurBB);
+ storeVal = vnStore->VNApplySelectorsAssign(VNK_Liberal, fgCurMemoryVN[GcHeap], fldSeqForStaticVar,
+ storeVal, lhs->TypeGet(), compCurBB);
// It is not strictly necessary to set the lhs value number,
// but the dumps read better with it set to the 'storeVal' that we just computed
@@ -6026,23 +6218,22 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
#ifdef DEBUG
if (verbose)
{
- printf(" fgCurHeapVN assigned:\n");
+ printf(" fgCurMemoryVN assigned:\n");
}
#endif // DEBUG
- // bbHeapDef must be set to true for any block that Mutates the global Heap
- assert(compCurBB->bbHeapDef);
+ // bbMemoryDef must include GcHeap for any block that mutates the GC heap
+ assert((compCurBB->bbMemoryDef & memoryKindSet(GcHeap)) != 0);
- // Update the field map for the fgCurHeapVN
- fgCurHeapVN = storeVal;
- fgValueNumberRecordHeapSsa(tree);
+ // Update the field map for the fgCurMemoryVN and SSA for the tree
+ recordGcHeapStore(tree, storeVal DEBUGARG("Static Field store"));
}
break;
default:
assert(!"Unknown node for lhs of assignment!");
- // For Unknown stores, mutate the global heap
- fgMutateHeap(lhs DEBUGARG("Unkwown Assignment - store")); // always change fgCurHeapVN
+ // For Unknown stores, mutate GcHeap/ByrefExposed
+ fgMutateGcHeap(lhs DEBUGARG("Unkwown Assignment - store")); // always change fgCurMemoryVN
break;
}
}
@@ -6123,7 +6314,9 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
else if ((oper == GT_IND) || GenTree::OperIsBlk(oper))
{
// So far, we handle cases in which the address is a ptr-to-local, or if it's
- // a pointer to an object field.
+ // a pointer to an object field or array alement. Other cases become uses of
+ // the current ByrefExposed value and the pointer value, so that at least we
+ // can recognize redundant loads with no stores between them.
GenTreePtr addr = tree->AsIndir()->Addr();
GenTreeLclVarCommon* lclVarTree = nullptr;
FieldSeqNode* fldSeq1 = nullptr;
@@ -6150,8 +6343,8 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
}
else if (isVolatile)
{
- // For Volatile indirection, mutate the global heap
- fgMutateHeap(tree DEBUGARG("GTF_IND_VOLATILE - read"));
+ // For Volatile indirection, mutate GcHeap/ByrefExposed
+ fgMutateGcHeap(tree DEBUGARG("GTF_IND_VOLATILE - read"));
// The value read by the GT_IND can immediately change
ValueNum newUniq = vnStore->VNForExpr(compCurBB, tree->TypeGet());
@@ -6226,6 +6419,39 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
fgValueNumberArrIndexVal(tree, elemTypeEq, arrVN, inxVN, addrXvnp.GetLiberal(), fldSeq);
}
}
+ else if (tree->gtFlags & GTF_IND_ARR_LEN)
+ {
+ // It's an array length. The argument is the sum of an array ref with some integer values...
+ ValueNum arrRefLib = vnStore->VNForRefInAddr(tree->gtOp.gtOp1->gtVNPair.GetLiberal());
+ ValueNum arrRefCons = vnStore->VNForRefInAddr(tree->gtOp.gtOp1->gtVNPair.GetConservative());
+
+ assert(vnStore->TypeOfVN(arrRefLib) == TYP_REF || vnStore->TypeOfVN(arrRefLib) == TYP_BYREF);
+ if (vnStore->IsVNConstant(arrRefLib))
+ {
+ // (or in weird cases, a REF or BYREF constant, in which case the result is an exception).
+ tree->gtVNPair.SetLiberal(
+ vnStore->VNWithExc(ValueNumStore::VNForVoid(),
+ vnStore->VNExcSetSingleton(
+ vnStore->VNForFunc(TYP_REF, VNF_NullPtrExc, arrRefLib))));
+ }
+ else
+ {
+ tree->gtVNPair.SetLiberal(vnStore->VNForFunc(TYP_INT, VNFunc(GT_ARR_LENGTH), arrRefLib));
+ }
+ assert(vnStore->TypeOfVN(arrRefCons) == TYP_REF || vnStore->TypeOfVN(arrRefCons) == TYP_BYREF);
+ if (vnStore->IsVNConstant(arrRefCons))
+ {
+ // (or in weird cases, a REF or BYREF constant, in which case the result is an exception).
+ tree->gtVNPair.SetConservative(
+ vnStore->VNWithExc(ValueNumStore::VNForVoid(),
+ vnStore->VNExcSetSingleton(
+ vnStore->VNForFunc(TYP_REF, VNF_NullPtrExc, arrRefCons))));
+ }
+ else
+ {
+ tree->gtVNPair.SetConservative(vnStore->VNForFunc(TYP_INT, VNFunc(GT_ARR_LENGTH), arrRefCons));
+ }
+ }
// In general we skip GT_IND nodes on that are the LHS of an assignment. (We labeled these earlier.)
// We will "evaluate" this as part of the assignment. (Unless we're explicitly told by
@@ -6266,10 +6492,10 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
if (fldSeqForStaticVar != FieldSeqStore::NotAField())
{
ValueNum selectedStaticVar;
- // We model statics as indices into the heap variable.
+ // We model statics as indices into the GcHeap (which is a subset of ByrefExposed).
size_t structSize = 0;
- selectedStaticVar =
- vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN, fldSeqForStaticVar, &structSize);
+ selectedStaticVar = vnStore->VNApplySelectors(VNK_Liberal, fgCurMemoryVN[GcHeap],
+ fldSeqForStaticVar, &structSize);
selectedStaticVar = vnStore->VNApplySelectorsTypeCheck(selectedStaticVar, indType, structSize);
tree->gtVNPair.SetLiberal(selectedStaticVar);
@@ -6294,7 +6520,7 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
}
else if (fldSeq2 != nullptr)
{
- // Get the first (instance or static) field from field seq. Heap[field] will yield the "field
+ // Get the first (instance or static) field from field seq. GcHeap[field] will yield the "field
// map".
CLANG_FORMAT_COMMENT_ANCHOR;
@@ -6313,7 +6539,7 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
FieldSeqNode* firstFieldOnly = GetFieldSeqStore()->CreateSingleton(fldSeq2->m_fieldHnd);
size_t structSize = 0;
ValueNum fldMapVN =
- vnStore->VNApplySelectors(VNK_Liberal, fgCurHeapVN, firstFieldOnly, &structSize);
+ vnStore->VNApplySelectors(VNK_Liberal, fgCurMemoryVN[GcHeap], firstFieldOnly, &structSize);
// The final field in the sequence will need to match the 'indType'
var_types indType = tree->TypeGet();
@@ -6357,9 +6583,12 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp);
}
}
- else // We don't know where the address points.
+ else // We don't know where the address points, so it is an ByrefExposed load.
{
- tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
+ ValueNum addrVN = addr->gtVNPair.GetLiberal();
+ ValueNum loadVN = fgValueNumberByrefExposedLoad(typ, addrVN);
+ tree->gtVNPair.SetLiberal(loadVN);
+ tree->gtVNPair.SetConservative(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
tree->gtVNPair = vnStore->VNPWithExc(tree->gtVNPair, addrXvnp);
}
}
@@ -6506,62 +6735,16 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
tree->gtOp.gtOp1->gtVNPair)));
break;
- case GT_BLK:
- case GT_OBJ:
- case GT_IND:
- if (tree->gtFlags & GTF_IND_ARR_LEN)
- {
- // It's an array length. The argument is the sum of an array ref with some integer values...
- ValueNum arrRefLib = vnStore->VNForRefInAddr(tree->gtOp.gtOp1->gtVNPair.GetLiberal());
- ValueNum arrRefCons = vnStore->VNForRefInAddr(tree->gtOp.gtOp1->gtVNPair.GetConservative());
-
- assert(vnStore->TypeOfVN(arrRefLib) == TYP_REF || vnStore->TypeOfVN(arrRefLib) == TYP_BYREF);
- if (vnStore->IsVNConstant(arrRefLib))
- {
- // (or in weird cases, a REF or BYREF constant, in which case the result is an exception).
- tree->gtVNPair.SetLiberal(
- vnStore->VNWithExc(ValueNumStore::VNForVoid(),
- vnStore->VNExcSetSingleton(
- vnStore->VNForFunc(TYP_REF, VNF_NullPtrExc, arrRefLib))));
- }
- else
- {
- tree->gtVNPair.SetLiberal(vnStore->VNForFunc(TYP_INT, VNFunc(GT_ARR_LENGTH), arrRefLib));
- }
- assert(vnStore->TypeOfVN(arrRefCons) == TYP_REF || vnStore->TypeOfVN(arrRefCons) == TYP_BYREF);
- if (vnStore->IsVNConstant(arrRefCons))
- {
- // (or in weird cases, a REF or BYREF constant, in which case the result is an exception).
- tree->gtVNPair.SetConservative(
- vnStore->VNWithExc(ValueNumStore::VNForVoid(),
- vnStore->VNExcSetSingleton(
- vnStore->VNForFunc(TYP_REF, VNF_NullPtrExc, arrRefCons))));
- }
- else
- {
- tree->gtVNPair.SetConservative(
- vnStore->VNForFunc(TYP_INT, VNFunc(GT_ARR_LENGTH), arrRefCons));
- }
- }
- else
- {
- tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
- }
- break;
-
case GT_LOCKADD: // Binop
case GT_XADD: // Binop
case GT_XCHG: // Binop
- // For CMPXCHG and other intrinsics add an arbitrary side effect on Heap.
- fgMutateHeap(tree DEBUGARG("Interlocked intrinsic"));
+ // For CMPXCHG and other intrinsics add an arbitrary side effect on GcHeap/ByrefExposed.
+ fgMutateGcHeap(tree DEBUGARG("Interlocked intrinsic"));
tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
break;
case GT_JTRUE:
case GT_LIST:
-#ifndef LEGACY_BACKEND
- case GT_FIELD_LIST:
-#endif // !LEGACY_BACKEND
// These nodes never need to have a ValueNumber
tree->gtVNPair.SetBoth(ValueNumStore::NoVN);
break;
@@ -6592,18 +6775,18 @@ void Compiler::fgValueNumberTree(GenTreePtr tree, bool evalAsgLhsInd)
// A bounds check node has no value, but may throw exceptions.
ValueNumPair excSet = vnStore->VNPExcSetSingleton(
vnStore->VNPairForFunc(TYP_REF, VNF_IndexOutOfRangeExc,
- vnStore->VNPNormVal(tree->AsBoundsChk()->gtArrLen->gtVNPair),
- vnStore->VNPNormVal(tree->AsBoundsChk()->gtIndex->gtVNPair)));
- excSet = vnStore->VNPExcSetUnion(excSet, vnStore->VNPExcVal(tree->AsBoundsChk()->gtArrLen->gtVNPair));
+ vnStore->VNPNormVal(tree->AsBoundsChk()->gtIndex->gtVNPair),
+ vnStore->VNPNormVal(tree->AsBoundsChk()->gtArrLen->gtVNPair)));
excSet = vnStore->VNPExcSetUnion(excSet, vnStore->VNPExcVal(tree->AsBoundsChk()->gtIndex->gtVNPair));
+ excSet = vnStore->VNPExcSetUnion(excSet, vnStore->VNPExcVal(tree->AsBoundsChk()->gtArrLen->gtVNPair));
tree->gtVNPair = vnStore->VNPWithExc(vnStore->VNPForVoid(), excSet);
}
break;
case GT_CMPXCHG: // Specialop
- // For CMPXCHG and other intrinsics add an arbitrary side effect on Heap.
- fgMutateHeap(tree DEBUGARG("Interlocked intrinsic"));
+ // For CMPXCHG and other intrinsics add an arbitrary side effect on GcHeap/ByrefExposed.
+ fgMutateGcHeap(tree DEBUGARG("Interlocked intrinsic"));
tree->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, tree->TypeGet()));
break;
@@ -6896,6 +7079,24 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN
}
else
{
+ auto getCurrentArg = [call, &args, useEntryPointAddrAsArg0](int currentIndex) {
+ GenTreePtr arg = args->Current();
+ if ((arg->gtFlags & GTF_LATE_ARG) != 0)
+ {
+ // This arg is a setup node that moves the arg into position.
+ // Value-numbering will have visited the separate late arg that
+ // holds the actual value, and propagated/computed the value number
+ // for this arg there.
+ if (useEntryPointAddrAsArg0)
+ {
+ // The args in the fgArgInfo don't include the entry point, so
+ // index into them using one less than the requested index.
+ --currentIndex;
+ }
+ return call->fgArgInfo->GetLateArg(currentIndex);
+ }
+ return arg;
+ };
// Has at least one argument.
ValueNumPair vnp0;
ValueNumPair vnp0x = ValueNumStore::VNPForEmptyExcSet();
@@ -6909,7 +7110,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN
#endif
{
assert(!useEntryPointAddrAsArg0);
- ValueNumPair vnp0wx = args->Current()->gtVNPair;
+ ValueNumPair vnp0wx = getCurrentArg(0)->gtVNPair;
vnStore->VNPUnpackExc(vnp0wx, &vnp0, &vnp0x);
// Also include in the argument exception sets
@@ -6931,7 +7132,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN
else
{
// Has at least two arguments.
- ValueNumPair vnp1wx = args->Current()->gtVNPair;
+ ValueNumPair vnp1wx = getCurrentArg(1)->gtVNPair;
ValueNumPair vnp1;
ValueNumPair vnp1x = ValueNumStore::VNPForEmptyExcSet();
vnStore->VNPUnpackExc(vnp1wx, &vnp1, &vnp1x);
@@ -6951,7 +7152,7 @@ void Compiler::fgValueNumberHelperCallFunc(GenTreeCall* call, VNFunc vnf, ValueN
}
else
{
- ValueNumPair vnp2wx = args->Current()->gtVNPair;
+ ValueNumPair vnp2wx = getCurrentArg(2)->gtVNPair;
ValueNumPair vnp2;
ValueNumPair vnp2x = ValueNumStore::VNPForEmptyExcSet();
vnStore->VNPUnpackExc(vnp2wx, &vnp2, &vnp2x);
@@ -6988,16 +7189,7 @@ void Compiler::fgValueNumberCall(GenTreeCall* call)
if (arg->OperGet() == GT_ARGPLACE)
{
// Find the corresponding late arg.
- GenTreePtr lateArg = nullptr;
- for (unsigned j = 0; j < call->fgArgInfo->ArgCount(); j++)
- {
- if (call->fgArgInfo->ArgTable()[j]->argNum == i)
- {
- lateArg = call->fgArgInfo->ArgTable()[j]->node;
- break;
- }
- }
- assert(lateArg != nullptr);
+ GenTreePtr lateArg = call->fgArgInfo->GetLateArg(i);
assert(lateArg->gtVNPair.BothDefined());
arg->gtVNPair = lateArg->gtVNPair;
updatedArgPlace = true;
@@ -7028,8 +7220,8 @@ void Compiler::fgValueNumberCall(GenTreeCall* call)
if (modHeap)
{
- // For now, arbitrary side effect on Heap.
- fgMutateHeap(call DEBUGARG("HELPER - modifies heap"));
+ // For now, arbitrary side effect on GcHeap/ByrefExposed.
+ fgMutateGcHeap(call DEBUGARG("HELPER - modifies heap"));
}
}
else
@@ -7043,8 +7235,8 @@ void Compiler::fgValueNumberCall(GenTreeCall* call)
call->gtVNPair.SetBoth(vnStore->VNForExpr(compCurBB, call->TypeGet()));
}
- // For now, arbitrary side effect on Heap.
- fgMutateHeap(call DEBUGARG("CALL"));
+ // For now, arbitrary side effect on GcHeap/ByrefExposed.
+ fgMutateGcHeap(call DEBUGARG("CALL"));
}
}
diff --git a/src/jit/valuenum.h b/src/jit/valuenum.h
index c8a57ff210..e6e0e43a33 100644
--- a/src/jit/valuenum.h
+++ b/src/jit/valuenum.h
@@ -217,7 +217,7 @@ private:
#ifdef DEBUG
// This helps test some performance pathologies related to "evaluation" of VNF_MapSelect terms,
- // especially relating to the heap. We count the number of applications of such terms we consider,
+ // especially relating to GcHeap/ByrefExposed. We count the number of applications of such terms we consider,
// and if this exceeds a limit, indicated by a COMPlus_ variable, we assert.
unsigned m_numMapSels;
#endif
@@ -762,7 +762,7 @@ public:
// the function application it represents; otherwise, return "false."
bool GetVNFunc(ValueNum vn, VNFuncApp* funcApp);
- // Requires that "vn" represents a "heap address" the sum of a "TYP_REF" value and some integer
+ // Requires that "vn" represents a "GC heap address" the sum of a "TYP_REF" value and some integer
// value. Returns the TYP_REF value.
ValueNum VNForRefInAddr(ValueNum vn);
diff --git a/src/jit/valuenumfuncs.h b/src/jit/valuenumfuncs.h
index eb17aedf28..cb99507921 100644
--- a/src/jit/valuenumfuncs.h
+++ b/src/jit/valuenumfuncs.h
@@ -18,10 +18,10 @@ ValueNumFuncDef(PtrToLoc, 2, false, false, false) // Pointer (byref) t
ValueNumFuncDef(PtrToArrElem, 4, false, false, false) // Pointer (byref) to an array element. Args: 0: array elem type eq class var_types value, VN's of: 1: array, 2: index, 3: FieldSeq.
ValueNumFuncDef(PtrToStatic, 1, false, false, false) // Pointer (byref) to a static variable (or possibly a field thereof, if the static variable is a struct). Args: 0: FieldSeq, first element
// of which is the static var.
-ValueNumFuncDef(Phi, 2, false, false, false) // A phi function. Only occurs as arg of PhiDef or PhiHeapDef. Arguments are SSA numbers of var being defined.
-ValueNumFuncDef(PhiDef, 3, false, false, false) // Args: 0: local var # (or -1 for Heap), 1: SSA #, 2: VN of definition.
-// Wouldn't need this if I'd made Heap a regular local variable...
-ValueNumFuncDef(PhiHeapDef, 2, false, false, false) // Args: 0: VN for basic block pointer, 1: VN of definition
+ValueNumFuncDef(Phi, 2, false, false, false) // A phi function. Only occurs as arg of PhiDef or PhiMemoryDef. Arguments are SSA numbers of var being defined.
+ValueNumFuncDef(PhiDef, 3, false, false, false) // Args: 0: local var # (or -1 for memory), 1: SSA #, 2: VN of definition.
+// Wouldn't need this if I'd made memory a regular local variable...
+ValueNumFuncDef(PhiMemoryDef, 2, false, false, false) // Args: 0: VN for basic block pointer, 1: VN of definition
ValueNumFuncDef(InitVal, 1, false, false, false) // An input arg, or init val of a local Args: 0: a constant VN.
@@ -37,6 +37,8 @@ ValueNumFuncDef(ReadyToRunIsInstanceOf, 2, false, false, false) // Args: 0
ValueNumFuncDef(LdElemA, 3, false, false, false) // Args: 0: array value; 1: index value; 2: type handle of element.
+ValueNumFuncDef(ByrefExposedLoad, 3, false, false, false) // Args: 0: type handle/id, 1: pointer value; 2: ByrefExposed heap value
+
ValueNumFuncDef(GetRefanyVal, 2, false, false, false) // Args: 0: type handle; 1: typedref value. Returns the value (asserting that the type is right).
ValueNumFuncDef(GetClassFromMethodParam, 1, false, true, false) // Args: 0: method generic argument.
diff --git a/src/jit/valuenumtype.h b/src/jit/valuenumtype.h
index f898d87532..b2ebba69c5 100644
--- a/src/jit/valuenumtype.h
+++ b/src/jit/valuenumtype.h
@@ -17,9 +17,9 @@
typedef UINT32 ValueNum;
// There are two "kinds" of value numbers, which differ in their modeling of the actions of other threads.
-// "Liberal" value numbers assume that the other threads change contents of heap locations only at
+// "Liberal" value numbers assume that the other threads change contents of memory locations only at
// synchronization points. Liberal VNs are appropriate, for example, in identifying CSE opportunities.
-// "Conservative" value numbers assume that the contents of heap locations change arbitrarily between
+// "Conservative" value numbers assume that the contents of memory locations change arbitrarily between
// every two accesses. Conservative VNs are appropriate, for example, in assertion prop, where an observation
// of a property of the value in some storage location is used to perform an optimization downstream on
// an operation involving the contents of that storage location. If other threads may modify the storage